4343 "Independence" : [make_independent_noise , None ],
4444}
4545
46-
4746###############################################################################
4847def _train_forest (X , y , criterion ):
4948 """Fit a RandomForestRegressor with default parameters and specific criterion."""
@@ -58,24 +57,29 @@ def _test_forest(X, y, regr):
5857 y_pred = regr .predict (X )
5958 return mean_squared_error (y , y_pred )
6059
61- def _prep_data (sim_dict , simulation_name , max_n_samples , n_dimensions ):
60+ def _prep_data (sim_dict , simulation_name , max_n_samples , n_dimensions , n_trials ):
6261 """Generate train and test data for all trials."""
6362 # Get simulation parameters and validation dataset
6463 sim , noise , (X_test , y_test ) = simulations [simulation_name ]
6564 n_samples = int (max_n_samples )
6665 n_dimensions = int (n_dimensions )
6766
68- # Sample training data
69- if noise is not None :
70- X_train , y_train = sim (n_samples = n_samples ,
71- n_dimensions = n_dimensions ,
72- noise = noise ,
73- random_state = random_state )
74- else :
75- X_train , y_train = sim (n_samples = n_samples ,
76- n_dimensions = n_dimensions ,
77- random_state = random_state )
78- sim_dict [simulation_name ] = (X_train , y_train , X_test , y_test )
67+ np .random .seed (random_state )
68+ seeds = np .random .randint (1e8 , size = n_trials )
69+
70+ sim_dict [simulation_name ] = {}
71+ for i in range (n_trials ):
72+ # Sample training data
73+ if noise is not None :
74+ X_train , y_train = sim (n_samples = n_samples ,
75+ n_dimensions = n_dimensions ,
76+ noise = noise ,
77+ random_state = seeds [i ])
78+ else :
79+ X_train , y_train = sim (n_samples = n_samples ,
80+ n_dimensions = n_dimensions ,
81+ random_state = seeds [i ])
82+ sim_dict [simulation_name ][i ] = (np .copy (X_train ), np .copy (y_train ), np .copy (X_test ), np .copy (y_test ))
7983 return sim_dict
8084
8185###############################################################################
@@ -85,8 +89,8 @@ def main(simulation_name, sim_data, n_samples, criterion, n_dimensions, n_iter):
8589 ----------
8690 simulation_name : str
8791 Key from `simulations` dictionary.
88- sim_data: dict
89- Contains X_train, y_train, X_test, and y_test for each simulation_name
92+ sim_data: tuple
93+ Contains X_train, y_train, X_test, and y_tests
9094 X_train : np.array #TODO check this
9195 All X training data for given simulation
9296 y_train : np.array # TODO
@@ -178,12 +182,12 @@ def main(simulation_name, sim_data, n_samples, criterion, n_dimensions, n_iter):
178182# Generate training and test data for simulations
179183sim_data = {}
180184for sim in simulation_names :
181- sim_data = _prep_data (sim_data , sim , sample_sizes [- 1 ], n_dimensions )
185+ sim_data = _prep_data (sim_data , sim , sample_sizes [- 1 ], n_dimensions , n_repeats )
182186
183187# Run the simulations in parallel
184188data = Parallel (n_jobs = - 2 )(delayed (main )
185- ( sim , sim_data [simulation_name ], n , crit , n_dimensions , n_iter )
186- for sim , n , crit , n_iter in params )
189+ ( sim_name , sim_data [sim_name ][ n_iter ], n , crit , n_dimensions , n_iter )
190+ for sim_name , n , crit , n_iter in params )
187191
188192# Save results as a DataFrame
189193columns = ["simulation" , "n_samples" , "criterion" ,
@@ -198,4 +202,6 @@ def main(simulation_name, sim_data, n_samples, criterion, n_dimensions, n_iter):
198202 kind = "line" ,
199203 data = df ,
200204 facet_kws = {'sharey' : False , 'sharex' : True })
201- plt .show ()
205+ plt .tight_layout ()
206+ plt .savefig ("splitter_comparison_02_17.png" )
207+ plt .show ()
0 commit comments