Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More accurate names for cv and Gridsearch results #117

Merged
merged 7 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
also removes the need of forwarding the `mock_label` and `group_label` arguments to the underlying optimizer.
The use of the `mock_label` and `group_label` arguments has been removed without depreciation.
(https://github.com/mad-lab-fau/tpcp/pull/114)
- All classes and methods that "grid-search" or "cross-validate" like output (`GridSearch`, `GridSearchCv`, `cross_validate`, `validate`)
have updated names for all their output attributes.
In most cases the output naming has switched from a single underscore to a double underscore to separate the different
parts of the output name to make it easier to programmatically access the output.

## [0.34.1] - 2024-07-02

Expand Down
14 changes: 9 additions & 5 deletions examples/integrations/_01_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def predicted_labels_(self):

@make_optimize_safe
def self_optimize(self, dataset, **_) -> Self:
data = np.vstack([d.input_as_array() for d in dataset])
labels = np.hstack([d.labels_as_array() for d in dataset])
data = tf.convert_to_tensor(np.vstack([d.input_as_array() for d in dataset]))
labels = tf.convert_to_tensor(np.hstack([d.labels_as_array() for d in dataset]))

print(data.shape)
if self._model is not None:
Expand Down Expand Up @@ -192,7 +192,7 @@ def self_optimize(self, dataset, **_) -> Self:
def run(self, datapoint) -> Self:
if self._model is None:
raise RuntimeError("Model not trained yet!")
data = datapoint.input_as_array()
data = tf.convert_to_tensor(datapoint.input_as_array())

self.predictions_ = self._model.predict(data)
return self
Expand Down Expand Up @@ -262,8 +262,12 @@ def scoring(pipeline, datapoint):

# %%
# We can now look at the results per group:
cv_results["test_single_accuracy"]
cv_results["test__single__accuracy"]

# %%
# Average first per group and then over all groups:
cv_results["test__agg__accuracy"]

# %%
# And the overall accuracy as the average over all samples of all groups within a fold:
cv_results["test_per_sample__accuracy"]
cv_results["test__agg__per_sample__accuracy"]
4 changes: 2 additions & 2 deletions examples/parameter_optimization/_03_gridsearch_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,12 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData) -> dict[str, float]:
# The mean score is the primary parameter used to select the best parameter combi (if `return_optimized` is True).
# All other values performance values are just there to provide further insight.

results_df[["mean_test_precision", "mean_test_recall", "mean_test_f1_score"]]
results_df.filter(like="mean__test__agg__")

# %%
# For even more insight, you can inspect the scores per datapoint:

results_df.filter(like="test_single")
results_df.filter(like="test__single__")

# %%
# If `return_optimized` was set to True (or the name of a score), a final optimization is performed using the best
Expand Down
18 changes: 15 additions & 3 deletions examples/validation/_01_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,23 +96,35 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
# To simplify things a little, we will split the output into three parts:
#
# The main output are the means of the performance values over all datapoints.
# They are all prefixed with `agg__` to make it easy to filter them out within the results.
# Note that if you want to use different aggregation methods, you can create and pass a custom scorer to
# :func:`~tpcp.validate.validate`. See the example on :ref:`custom scorers <custom_scorer>` for further details.
performance = result_df[["precision", "recall", "f1_score"]]
performance = result_df.filter(like="agg__")
performance

# %%
# If you need more insight into the results, you can inspect the
# individual score for each data point given in a list. In this example, we had 12 data points.
# Thus, we retrieve have 12 values for each score.
# These values are all prefixed with `single__`.
# Inspecting this list can help to identify potential issues with certain parts of your dataset.
# To link the performance values to a specific datapoint, you can look at the `data_labels` field.
single_performance = result_df[["single_precision", "single_recall", "single_f1_score", "data_labels"]]
single_performance = result_df.filter(like="single__")
single_performance

# %%
# It is often quite handy to explode this dataframe and combine it with the data labels.
# This way, you can easily identify the datapoints that are causing issues.
exploded_results = (
single_performance.explode(single_performance.columns.to_list())
.rename_axis("fold")
.set_index(result_df["data_labels"].explode(), append=True)
)
exploded_results

# %%
# The final level of debug information is provided via the timings.
timings = result_df[["score_time"]]
timings = result_df.filter(like="debug__")
timings

# %%
Expand Down
36 changes: 18 additions & 18 deletions examples/validation/_02_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,10 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
# To simplify things a little, we will split the output into four parts:
#
# The main output are the test set performance values.
# They are aggregated over all datapoints in each fold using the aggregation specified in the `scoring` function.
# They all are prefixed with ``test__agg__`` to easily filter them out within the results.
# Each row corresponds to performance in respective fold.
performance = result_df[["test_precision", "test_recall", "test_f1_score"]]
performance = result_df.filter(like="test__agg__")
performance

# %%
Expand All @@ -146,34 +148,32 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):
# In this example this is only a list with a single element per score, as we only had a single datapoint per fold.
# In a real scenario, this will be a list of all datapoints.
# Inspecting this list can help to identify potential issues with certain parts of your dataset.
# To link the performance values to a specific datapoint, you can look at the `test_data_labels` field.
single_performance = result_df[
["test_single_precision", "test_single_recall", "test_single_f1_score", "test_data_labels"]
]
single_performance = result_df.filter(like="test__single__")
single_performance

# %%
# Even further insight is provided by the train results (if activated in parameters).
# To link the performance values to a specific datapoint, you can look at the `test__data_labels` field.
# It is often quite handy to combine all the results into on df:
exploded_results = (
single_performance.explode(single_performance.columns.to_list())
.rename_axis("fold")
.set_index(result_df["test__data_labels"].explode(), append=True)
)
exploded_results


# %%
# Even further insight is provided by the train results (if activated via ``return_train_score``).
# These are the performance results on the train set and can indicate if the training provided meaningful results and
# can also indicate over-fitting, if the performance of the test set is much worse than the performance on the train
# set.
train_performance = result_df[
[
"train_precision",
"train_recall",
"train_f1_score",
"train_single_precision",
"train_single_recall",
"train_single_f1_score",
"train_data_labels",
]
]
train_performance = result_df.filter(like="train__")
train_performance

# %%
# The final level of debug information is provided via the timings (note the long runtime in fold 0 can be explained
# by the jit-compiler used in `BarthDtw`) ...
timings = result_df[["score_time", "optimize_time"]]
timings = result_df.filter(like="debug__")
timings

# %%
Expand Down
6 changes: 3 additions & 3 deletions examples/validation/_04_advanced_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):

# %%
# We can see that the test data of the first fold contains only participants from group 1.
result_df["test_data_labels"].explode()
result_df["test__data_labels"].explode()

# %%
# This works fine when the groups are just "additional information", and are unlikely to affect the data within.
Expand All @@ -132,7 +132,7 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):

results = cross_validate(optimizable_pipe, data_imbalanced, scoring=score, cv=cv)
result_df_stratified = pd.DataFrame(results)
result_df_stratified["test_data_labels"].explode()
result_df_stratified["test__data_labels"].explode()

# %%
# Now we can see that the groups are balanced in each fold and both folds get one of the remaining group 1 participants.
Expand All @@ -155,7 +155,7 @@ def score(pipeline: MyPipeline, datapoint: ECGExampleData):

results = cross_validate(optimizable_pipe, example_data, scoring=score, cv=cv)
result_df_grouped = pd.DataFrame(results)
result_df_grouped["test_data_labels"].explode()
result_df_grouped["test__data_labels"].explode()

# %%
# We can see that this forces the creation of unequal sice splits to ensure that the groups are kept together.
Expand Down
50 changes: 25 additions & 25 deletions tests/test_examples/snapshot/test_advanced_cross_validate_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"type":"integer"
},
{
"name":"test_data_labels",
"name":"test__data_labels",
"type":"string"
}
],
Expand All @@ -23,122 +23,122 @@
{
"index":0,
"fold_id":0,
"test_data_labels":"group_1"
"test__data_labels":"group_1"
},
{
"index":1,
"fold_id":0,
"test_data_labels":"100"
"test__data_labels":"100"
},
{
"index":2,
"fold_id":0,
"test_data_labels":"group_3"
"test__data_labels":"group_3"
},
{
"index":3,
"fold_id":0,
"test_data_labels":"104"
"test__data_labels":"104"
},
{
"index":4,
"fold_id":0,
"test_data_labels":"group_1"
"test__data_labels":"group_1"
},
{
"index":5,
"fold_id":0,
"test_data_labels":"105"
"test__data_labels":"105"
},
{
"index":6,
"fold_id":0,
"test_data_labels":"group_3"
"test__data_labels":"group_3"
},
{
"index":7,
"fold_id":0,
"test_data_labels":"108"
"test__data_labels":"108"
},
{
"index":8,
"fold_id":0,
"test_data_labels":"group_1"
"test__data_labels":"group_1"
},
{
"index":9,
"fold_id":0,
"test_data_labels":"114"
"test__data_labels":"114"
},
{
"index":10,
"fold_id":0,
"test_data_labels":"group_3"
"test__data_labels":"group_3"
},
{
"index":11,
"fold_id":0,
"test_data_labels":"119"
"test__data_labels":"119"
},
{
"index":12,
"fold_id":0,
"test_data_labels":"group_1"
"test__data_labels":"group_1"
},
{
"index":13,
"fold_id":0,
"test_data_labels":"121"
"test__data_labels":"121"
},
{
"index":14,
"fold_id":0,
"test_data_labels":"group_3"
"test__data_labels":"group_3"
},
{
"index":15,
"fold_id":0,
"test_data_labels":"200"
"test__data_labels":"200"
},
{
"index":16,
"fold_id":1,
"test_data_labels":"group_2"
"test__data_labels":"group_2"
},
{
"index":17,
"fold_id":1,
"test_data_labels":"102"
"test__data_labels":"102"
},
{
"index":18,
"fold_id":1,
"test_data_labels":"group_2"
"test__data_labels":"group_2"
},
{
"index":19,
"fold_id":1,
"test_data_labels":"106"
"test__data_labels":"106"
},
{
"index":20,
"fold_id":1,
"test_data_labels":"group_2"
"test__data_labels":"group_2"
},
{
"index":21,
"fold_id":1,
"test_data_labels":"116"
"test__data_labels":"116"
},
{
"index":22,
"fold_id":1,
"test_data_labels":"group_2"
"test__data_labels":"group_2"
},
{
"index":23,
"fold_id":1,
"test_data_labels":"123"
"test__data_labels":"123"
}
]
}
Loading
Loading