2
2
===============================================================================
3
3
Comparing different split criteria for random forest regression on toy datasets
4
4
===============================================================================
5
-
6
5
An example to compare the different split criteria available for
7
6
:class:`sklearn.ensemble.RandomForestRegressor`.
8
-
9
7
Metrics used to evaluate these splitters include Mean Squared Error (MSE), a
10
8
measure of distance between the true target (`y_true`) and the predicted output
11
9
(`y_pred`), and runtime.
12
-
13
10
For visual examples of these datasets, see
14
11
:ref:`sphx_glr_auto_examples_datasets_plot_nonlinear_regression_datasets.py`.
15
12
"""
19
16
20
17
import time
21
18
from itertools import product
22
- from multiprocessing import Pool
19
+ from joblib import Parallel , delayed
23
20
24
21
import matplotlib .pyplot as plt
25
22
import numpy as np
@@ -65,7 +62,6 @@ def _test_forest(X, y, regr):
65
62
###############################################################################
66
63
def main (simulation_name , n_samples , criterion , n_dimensions , n_iter ):
67
64
"""Measure the performance of RandomForest under simulation conditions.
68
-
69
65
Parameters
70
66
----------
71
67
simulation_name : str
@@ -74,12 +70,11 @@ def main(simulation_name, n_samples, criterion, n_dimensions, n_iter):
74
70
Number of training samples.
75
71
criterion : string
76
72
Split criterion used to train forest. Choose from
77
- ("mse", "mae", "friedman_mse", "axis", "oblique" ).
73
+ ("mse", "mae", "friedman_mse").
78
74
n_dimensions : int
79
75
Number of features and targets to sample.
80
76
n_iter : int
81
77
Which repeat of the same simulation parameter we're on. Ignored.
82
-
83
78
Returns
84
79
-------
85
80
simulation_name : str
@@ -96,7 +91,7 @@ def main(simulation_name, n_samples, criterion, n_dimensions, n_iter):
96
91
runtime : float
97
92
Runtime (in seconds).
98
93
"""
99
- print (simulation_name , n_samples )
94
+ print (simulation_name , n_samples , criterion , n_dimensions , n_iter )
100
95
101
96
# Get simulation parameters and validation dataset
102
97
sim , noise , (X_test , y_test ) = simulations [simulation_name ]
@@ -133,7 +128,7 @@ def main(simulation_name, n_samples, criterion, n_dimensions, n_iter):
133
128
n_dimensions = 10
134
129
simulation_names = simulations .keys ()
135
130
sample_sizes = np .arange (5 , 51 , 3 )
136
- criteria = ["mae" , "mse" , "friedman_mse" , "axis" , "oblique" ]
131
+ criteria = ["mae" , "mse" , "friedman_mse" ]
137
132
138
133
# Number of times to repeat each simulation setting
139
134
n_repeats = 10
@@ -161,22 +156,20 @@ def main(simulation_name, n_samples, criterion, n_dimensions, n_iter):
161
156
###############################################################################
162
157
print ("Running simulations..." )
163
158
164
- with Pool () as pool :
165
-
166
- # Run the simulations in parallel
167
- data = pool .starmap (main , params )
168
-
169
- # Save results as a DataFrame
170
- columns = ["simulation" , "n_samples" , "criterion" ,
171
- "n_dimensions" , "mse" , "runtime" ]
172
- df = pd .DataFrame (data , columns = columns )
173
-
174
- # Plot the results
175
- sns .relplot (x = "n_samples" ,
176
- y = "mse" ,
177
- hue = "criterion" ,
178
- col = "simulation" ,
179
- kind = "line" ,
180
- data = df ,
181
- facet_kws = {'sharey' : False , 'sharex' : True })
182
- plt .show ()
159
+ # Run the simulations in parallel
160
+ data = Parallel (n_jobs = 4 )(delayed (main )(sim , n , crit , n_dim , n_iter ) for sim , n , crit , n_dim , n_iter in params )
161
+
162
+ # Save results as a DataFrame
163
+ columns = ["simulation" , "n_samples" , "criterion" ,
164
+ "n_dimensions" , "mse" , "runtime" ]
165
+ df = pd .DataFrame (data , columns = columns )
166
+
167
+ # Plot the results
168
+ sns .relplot (x = "n_samples" ,
169
+ y = "mse" ,
170
+ hue = "criterion" ,
171
+ col = "simulation" ,
172
+ kind = "line" ,
173
+ data = df ,
174
+ facet_kws = {'sharey' : False , 'sharex' : True })
175
+ plt .show ()
0 commit comments