|
| 1 | +""" |
| 2 | +==================================================================================== |
| 3 | +Use MAPIE to control risk of a binary classifier with multiple prediction parameters |
| 4 | +==================================================================================== |
| 5 | +
|
| 6 | +AI is a powerful tool for email sorting (for example between spam and urgent emails). |
| 7 | +However, because algorithms are not perfect, manual verification is sometimes required. |
| 8 | +Thus one would like to be able to control the amount of emails sent to human validation. |
| 9 | +One way to do so is to define a multi-parameter prediction function based on a |
| 10 | +classifier's predicted scores. This would allow defining a rule for email checking, |
| 11 | +which could be adapted by varying the prediction parameters. |
| 12 | +
|
| 13 | +In this example, we explain how to do risk control for binary classification relying |
| 14 | +on multiple prediction parameters with MAPIE. |
| 15 | +
|
| 16 | +""" |
| 17 | + |
| 18 | +import matplotlib.pyplot as plt |
| 19 | +import numpy as np |
| 20 | +from sklearn.datasets import make_circles |
| 21 | +from sklearn.neural_network import MLPClassifier |
| 22 | + |
| 23 | +from mapie.risk_control import BinaryClassificationController, BinaryClassificationRisk |
| 24 | +from mapie.utils import train_conformalize_test_split |
| 25 | + |
| 26 | +RANDOM_STATE = 1 |
| 27 | + |
| 28 | +############################################################################## |
| 29 | +# First, load the dataset and then split it into training, calibration, |
| 30 | +# and test sets. |
| 31 | + |
| 32 | +X, y = make_circles(n_samples=5000, noise=0.3, factor=0.3, random_state=RANDOM_STATE) |
| 33 | +(X_train, X_calib, X_test, y_train, y_calib, y_test) = train_conformalize_test_split( |
| 34 | + X, |
| 35 | + y, |
| 36 | + train_size=0.8, |
| 37 | + conformalize_size=0.1, |
| 38 | + test_size=0.1, |
| 39 | + random_state=RANDOM_STATE, |
| 40 | +) |
| 41 | + |
| 42 | +# Plot the three datasets to visualize the distribution of the two classes. We can |
| 43 | +# assume that the feature space represents some embedding of emails. |
| 44 | +fig, axes = plt.subplots(1, 3, figsize=(18, 6)) |
| 45 | +titles = ["Training Data", "Calibration Data", "Test Data"] |
| 46 | +datasets = [(X_train, y_train), (X_calib, y_calib), (X_test, y_test)] |
| 47 | + |
| 48 | +for i, (ax, (X_data, y_data), title) in enumerate(zip(axes, datasets, titles)): |
| 49 | + ax.scatter( |
| 50 | + X_data[y_data == 0, 0], |
| 51 | + X_data[y_data == 0, 1], |
| 52 | + edgecolors="k", |
| 53 | + c="tab:blue", |
| 54 | + label='"negative" class', |
| 55 | + alpha=0.5, |
| 56 | + ) |
| 57 | + ax.scatter( |
| 58 | + X_data[y_data == 1, 0], |
| 59 | + X_data[y_data == 1, 1], |
| 60 | + edgecolors="k", |
| 61 | + c="tab:red", |
| 62 | + label='"positive" class', |
| 63 | + alpha=0.5, |
| 64 | + ) |
| 65 | + ax.set_title(title, fontsize=18) |
| 66 | + ax.set_xlabel("Feature 1", fontsize=16) |
| 67 | + ax.tick_params(labelsize=14) |
| 68 | + |
| 69 | + if i == 0: |
| 70 | + ax.set_ylabel("Feature 2", fontsize=16) |
| 71 | + else: |
| 72 | + ax.set_ylabel("") |
| 73 | + ax.set_yticks([]) |
| 74 | + |
| 75 | +handles, labels = axes[0].get_legend_handles_labels() |
| 76 | +fig.legend( |
| 77 | + handles, |
| 78 | + labels, |
| 79 | + loc="lower center", |
| 80 | + bbox_to_anchor=(0.5, -0.01), |
| 81 | + ncol=2, |
| 82 | + fontsize=16, |
| 83 | +) |
| 84 | + |
| 85 | +plt.suptitle("Visualization of Train, Calibration, and Test Sets", fontsize=22) |
| 86 | +plt.tight_layout(rect=[0, 0.05, 1, 0.95]) |
| 87 | +plt.show() |
| 88 | + |
| 89 | +############################################################################## |
| 90 | +# Second, fit a Multi-layer Perceptron classifier on the training data. |
| 91 | + |
| 92 | +clf = MLPClassifier(max_iter=150, random_state=RANDOM_STATE) |
| 93 | +clf.fit(X_train, y_train) |
| 94 | + |
| 95 | + |
| 96 | +############################################################################# |
| 97 | +# Third define a multi-parameter prediction function. For an email to be sent |
| 98 | +# to human verification, we want the predicted score of the positive class to be |
| 99 | +# between two thresholds `lambda_1` and `lambda_2`. High (respectively low) values of |
| 100 | +# the score correspond to high confidence that the email is a spam (respectively not a spam). |
| 101 | +# Therefore, emails with intermediate scores are the ones for which the classifier |
| 102 | +# is the least certain, and we want these emails to be verified by a human. |
| 103 | +def send_to_human(X, lambda_1, lambda_2): |
| 104 | + y_score = clf.predict_proba(X)[:, 1] |
| 105 | + return (lambda_1 <= y_score) & (y_score < lambda_2) |
| 106 | + |
| 107 | + |
| 108 | +############################################################################# |
| 109 | +# From the previous function, we know we have a constraint |
| 110 | +# `lambda_1` <= `lambda_2`. We can generate a set of values to explore respecting |
| 111 | +# this constraint. |
| 112 | + |
| 113 | +to_explore = [] |
| 114 | +for i in range(6): |
| 115 | + lambda_1 = (i + 1) / 10 |
| 116 | + for j in [1, 2, 3, 4, 5]: |
| 117 | + lambda_2 = lambda_1 + j / 10 |
| 118 | + if lambda_2 > 0.99: |
| 119 | + break |
| 120 | + to_explore.append((lambda_1, lambda_2)) |
| 121 | +to_explore = np.array(to_explore) |
| 122 | + |
| 123 | +############################################################################# |
| 124 | +# Because we want to control the proportion of emails to be verified by a human, |
| 125 | +# we need to define a specific :class:`BinaryClassificationRisk` which represents |
| 126 | +# the fraction of samples predicted as positive (i.e., sent to human verification). |
| 127 | + |
| 128 | +prop_positive = BinaryClassificationRisk( |
| 129 | + risk_occurrence=lambda y_true, y_pred: y_pred, |
| 130 | + risk_condition=lambda y_true, y_pred: True, |
| 131 | + higher_is_better=False, |
| 132 | +) |
| 133 | + |
| 134 | +############################################################################## |
| 135 | +# Finally, we initialize a :class:`~mapie.risk_control.BinaryClassificationController` |
| 136 | +# using our custom function ``send_to_human``, our custom risk ``prop_positive``, |
| 137 | +# a target risk level (0.2), and a confidence level (0.9). Then we use the calibration |
| 138 | +# data to compute statistically guaranteed thresholds using a multi-parameter control |
| 139 | +# method. |
| 140 | + |
| 141 | +target_level = 0.2 |
| 142 | +confidence_level = 0.9 |
| 143 | + |
| 144 | +bcc = BinaryClassificationController( |
| 145 | + predict_function=send_to_human, |
| 146 | + risk=prop_positive, |
| 147 | + target_level=target_level, |
| 148 | + confidence_level=confidence_level, |
| 149 | + best_predict_param_choice="precision", |
| 150 | + list_predict_params=to_explore, |
| 151 | +) |
| 152 | +bcc.calibrate(X_calib, y_calib) |
| 153 | + |
| 154 | +print( |
| 155 | + f"{len(bcc.valid_predict_params)} multi-dimensional parameters " |
| 156 | + f"found that guarantee a proportion of emails sent to verification\n" |
| 157 | + f"of at most {target_level} with a confidence of {confidence_level}." |
| 158 | +) |
| 159 | + |
| 160 | +####################################################################### |
| 161 | +matrix = np.zeros((10, 10)) |
| 162 | +for valid_params in bcc.valid_predict_params: |
| 163 | + row = valid_params[0] * 10 |
| 164 | + col = valid_params[1] * 10 |
| 165 | + matrix[int(row), int(col)] = 1 |
| 166 | + |
| 167 | +fig, ax = plt.subplots(figsize=(6, 6)) |
| 168 | +im = ax.imshow(matrix, cmap="inferno") |
| 169 | +ax.set_xticks(range(10), labels=(np.array(range(10)) / 10)) |
| 170 | +ax.set_yticks(range(10), labels=(np.array(range(10)) / 10)) |
| 171 | +ax.set_xlabel(r"lambda_2") |
| 172 | +ax.set_ylabel(r"lambda_1") |
| 173 | +ax.set_title("Valid parameters") |
| 174 | +fig.tight_layout() |
| 175 | +plt.show() |
0 commit comments