mad-lab-fau · AKuederle · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   also removes the need of forwarding the `mock_label` and `group_label` arguments to the underlying optimizer.
   The use of the `mock_label` and `group_label` arguments has been removed without depreciation.
   (https://github.com/mad-lab-fau/tpcp/pull/114)
+- All classes and methods that "grid-search" or "cross-validate" like output (`GridSearch`, `GridSearchCv`, `cross_validate`, `validate`)
+  have updated names for all their output attributes.
+  In most cases the output naming has switched from a single underscore to a double underscore to separate the different
+  parts of the output name to make it easier to programmatically access the output.
 
 ## [0.34.1] - 2024-07-02
 

diff --git a/examples/integrations/_01_tensorflow.py b/examples/integrations/_01_tensorflow.py
@@ -162,8 +162,8 @@ def predicted_labels_(self):
 
     @make_optimize_safe
     def self_optimize(self, dataset, **_) -> Self:
-        data = np.vstack([d.input_as_array() for d in dataset])
-        labels = np.hstack([d.labels_as_array() for d in dataset])
+        data = tf.convert_to_tensor(np.vstack([d.input_as_array() for d in dataset]))
+        labels = tf.convert_to_tensor(np.hstack([d.labels_as_array() for d in dataset]))
 
         print(data.shape)
         if self._model is not None:
@@ -192,7 +192,7 @@ def self_optimize(self, dataset, **_) -> Self:
     def run(self, datapoint) -> Self:
         if self._model is None:
             raise RuntimeError("Model not trained yet!")
-        data = datapoint.input_as_array()
+        data = tf.convert_to_tensor(datapoint.input_as_array())
 
         self.predictions_ = self._model.predict(data)
         return self
@@ -262,8 +262,12 @@ def scoring(pipeline, datapoint):
 
 # %%
 # We can now look at the results per group:
-cv_results["test_single_accuracy"]
+cv_results["test__single__accuracy"]
+
+# %%
+# Average first per group and then over all groups:
+cv_results["test__agg__accuracy"]
 
 # %%
 # And the overall accuracy as the average over all samples of all groups within a fold:
-cv_results["test_per_sample__accuracy"]
+cv_results["test__agg__per_sample__accuracy"]
diff --git a/examples/parameter_optimization/_03_gridsearch_cv.py b/examples/parameter_optimization/_03_gridsearch_cv.py
@@ -160,12 +160,12 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData) -> dict[str, float]:
 # The mean score is the primary parameter used to select the best parameter combi (if `return_optimized` is True).
 # All other values performance values are just there to provide further insight.
 
-results_df[["mean_test_precision", "mean_test_recall", "mean_test_f1_score"]]
+results_df.filter(like="mean__test__agg__")
 
 # %%
 # For even more insight, you can inspect the scores per datapoint:
 
-results_df.filter(like="test_single")
+results_df.filter(like="test__single__")
 
 # %%
 # If `return_optimized` was set to True (or the name of a score), a final optimization is performed using the best

diff --git a/examples/validation/_01_validation.py b/examples/validation/_01_validation.py
@@ -96,23 +96,35 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
 # To simplify things a little, we will split the output into three parts:
 #
 # The main output are the means of the performance values over all datapoints.
+# They are all prefixed with `agg__` to make it easy to filter them out within the results.
 # Note that if you want to use different aggregation methods, you can create and pass a custom scorer to
 # :func:`~tpcp.validate.validate`. See the example on :ref:`custom scorers <custom_scorer>` for further details.
-performance = result_df[["precision", "recall", "f1_score"]]
+performance = result_df.filter(like="agg__")
 performance
 
 # %%
 # If you need more insight into the results, you can inspect the
 # individual score for each data point given in a list. In this example, we had 12 data points.
 # Thus, we retrieve have 12 values for each score.
+# These values are all prefixed with `single__`.
 # Inspecting this list can help to identify potential issues with certain parts of your dataset.
 # To link the performance values to a specific datapoint, you can look at the `data_labels` field.
-single_performance = result_df[["single_precision", "single_recall", "single_f1_score", "data_labels"]]
+single_performance = result_df.filter(like="single__")
 single_performance
 
+# %%
+# It is often quite handy to explode this dataframe and combine it with the data labels.
+# This way, you can easily identify the datapoints that are causing issues.
+exploded_results = (
+    single_performance.explode(single_performance.columns.to_list())
+    .rename_axis("fold")
+    .set_index(result_df["data_labels"].explode(), append=True)
+)
+exploded_results
+
 # %%
 # The final level of debug information is provided via the timings.
-timings = result_df[["score_time"]]
+timings = result_df.filter(like="debug__")
 timings
 
 # %%

diff --git a/examples/validation/_02_cross_validation.py b/examples/validation/_02_cross_validation.py
@@ -129,8 +129,10 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
 # To simplify things a little, we will split the output into four parts:
 #
 # The main output are the test set performance values.
+# They are aggregated over all datapoints in each fold using the aggregation specified in the `scoring` function.
+# They all are prefixed with ``test__agg__`` to easily filter them out within the results.
 # Each row corresponds to performance in respective fold.
-performance = result_df[["test_precision", "test_recall", "test_f1_score"]]
+performance = result_df.filter(like="test__agg__")
 performance
 
 # %%
@@ -146,34 +148,32 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
 # In this example this is only a list with a single element per score, as we only had a single datapoint per fold.
 # In a real scenario, this will be a list of all datapoints.
 # Inspecting this list can help to identify potential issues with certain parts of your dataset.
-# To link the performance values to a specific datapoint, you can look at the `test_data_labels` field.
-single_performance = result_df[
-    ["test_single_precision", "test_single_recall", "test_single_f1_score", "test_data_labels"]
-]
+single_performance = result_df.filter(like="test__single__")
 single_performance
 
 # %%
-# Even further insight is provided by the train results (if activated in parameters).
+# To link the performance values to a specific datapoint, you can look at the `test__data_labels` field.
+# It is often quite handy to combine all the results into on df:
+exploded_results = (
+    single_performance.explode(single_performance.columns.to_list())
+    .rename_axis("fold")
+    .set_index(result_df["test__data_labels"].explode(), append=True)
+)
+exploded_results
+
+
+# %%
+# Even further insight is provided by the train results (if activated via ``return_train_score``).
 # These are the performance results on the train set and can indicate if the training provided meaningful results and
 # can also indicate over-fitting, if the performance of the test set is much worse than the performance on the train
 # set.
-train_performance = result_df[
-    [
-        "train_precision",
-        "train_recall",
-        "train_f1_score",
-        "train_single_precision",
-        "train_single_recall",
-        "train_single_f1_score",
-        "train_data_labels",
-    ]
-]
+train_performance = result_df.filter(like="train__")
 train_performance
 
 # %%
 # The final level of debug information is provided via the timings (note the long runtime in fold 0 can be explained
 # by the jit-compiler used in `BarthDtw`) ...
-timings = result_df[["score_time", "optimize_time"]]
+timings = result_df.filter(like="debug__")
 timings
 
 # %%

diff --git a/examples/validation/_04_advanced_cross_validation.py b/examples/validation/_04_advanced_cross_validation.py
@@ -108,7 +108,7 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
 
 # %%
 # We can see that the test data of the first fold contains only participants from group 1.
-result_df["test_data_labels"].explode()
+result_df["test__data_labels"].explode()
 
 # %%
 # This works fine when the groups are just "additional information", and are unlikely to affect the data within.
@@ -132,7 +132,7 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
 
 results = cross_validate(optimizable_pipe, data_imbalanced, scoring=score, cv=cv)
 result_df_stratified = pd.DataFrame(results)
-result_df_stratified["test_data_labels"].explode()
+result_df_stratified["test__data_labels"].explode()
 
 # %%
 # Now we can see that the groups are balanced in each fold and both folds get one of the remaining group 1 participants.
@@ -155,7 +155,7 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
 
 results = cross_validate(optimizable_pipe, example_data, scoring=score, cv=cv)
 result_df_grouped = pd.DataFrame(results)
-result_df_grouped["test_data_labels"].explode()
+result_df_grouped["test__data_labels"].explode()
 
 # %%
 # We can see that this forces the creation of unequal sice splits to ensure that the groups are kept together.

diff --git a/tests/test_examples/snapshot/test_advanced_cross_validate_0.json b/tests/test_examples/snapshot/test_advanced_cross_validate_0.json
@@ -10,7 +10,7 @@
                 "type":"integer"
             },
             {
-                "name":"test_data_labels",
+                "name":"test__data_labels",
                 "type":"string"
             }
         ],
@@ -23,122 +23,122 @@
         {
             "index":0,
             "fold_id":0,
-            "test_data_labels":"group_1"
+            "test__data_labels":"group_1"
         },
         {
             "index":1,
             "fold_id":0,
-            "test_data_labels":"100"
+            "test__data_labels":"100"
         },
         {
             "index":2,
             "fold_id":0,
-            "test_data_labels":"group_3"
+            "test__data_labels":"group_3"
         },
         {
             "index":3,
             "fold_id":0,
-            "test_data_labels":"104"
+            "test__data_labels":"104"
         },
         {
             "index":4,
             "fold_id":0,
-            "test_data_labels":"group_1"
+            "test__data_labels":"group_1"
         },
         {
             "index":5,
             "fold_id":0,
-            "test_data_labels":"105"
+            "test__data_labels":"105"
         },
         {
             "index":6,
             "fold_id":0,
-            "test_data_labels":"group_3"
+            "test__data_labels":"group_3"
         },
         {
             "index":7,
             "fold_id":0,
-            "test_data_labels":"108"
+            "test__data_labels":"108"
         },
         {
             "index":8,
             "fold_id":0,
-            "test_data_labels":"group_1"
+            "test__data_labels":"group_1"
         },
         {
             "index":9,
             "fold_id":0,
-            "test_data_labels":"114"
+            "test__data_labels":"114"
         },
         {
             "index":10,
             "fold_id":0,
-            "test_data_labels":"group_3"
+            "test__data_labels":"group_3"
         },
         {
             "index":11,
             "fold_id":0,
-            "test_data_labels":"119"
+            "test__data_labels":"119"
         },
         {
             "index":12,
             "fold_id":0,
-            "test_data_labels":"group_1"
+            "test__data_labels":"group_1"
         },
         {
             "index":13,
             "fold_id":0,
-            "test_data_labels":"121"
+            "test__data_labels":"121"
         },
         {
             "index":14,
             "fold_id":0,
-            "test_data_labels":"group_3"
+            "test__data_labels":"group_3"
         },
         {
             "index":15,
             "fold_id":0,
-            "test_data_labels":"200"
+            "test__data_labels":"200"
         },
         {
             "index":16,
             "fold_id":1,
-            "test_data_labels":"group_2"
+            "test__data_labels":"group_2"
         },
         {
             "index":17,
             "fold_id":1,
-            "test_data_labels":"102"
+            "test__data_labels":"102"
         },
         {
             "index":18,
             "fold_id":1,
-            "test_data_labels":"group_2"
+            "test__data_labels":"group_2"
         },
         {
             "index":19,
             "fold_id":1,
-            "test_data_labels":"106"
+            "test__data_labels":"106"
         },
         {
             "index":20,
             "fold_id":1,
-            "test_data_labels":"group_2"
+            "test__data_labels":"group_2"
         },
         {
             "index":21,
             "fold_id":1,
-            "test_data_labels":"116"
+            "test__data_labels":"116"
         },
         {
             "index":22,
             "fold_id":1,
-            "test_data_labels":"group_2"
+            "test__data_labels":"group_2"
         },
         {
             "index":23,
             "fold_id":1,
-            "test_data_labels":"123"
+            "test__data_labels":"123"
         }
     ]
 }