Skip to content

Commit dd8501e

Browse files
authored
Merge pull request #36 from wwu-mmll/develop
Develop
2 parents 4217950 + 0355dd1 commit dd8501e

File tree

6 files changed

+136
-47
lines changed

6 files changed

+136
-47
lines changed

examples/heart_failure/heart_failure.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
from photonai.optimization import FloatRange, IntegerRange
55

66
# setup training and test workflow
7-
my_pipe = Hyperpipe('heart_failure_lasso',
8-
outer_cv=ShuffleSplit(n_splits=100, test_size=0.2),
7+
my_pipe = Hyperpipe('heart_failure',
8+
outer_cv=ShuffleSplit(n_splits=10, test_size=0.2),
99
inner_cv=KFold(n_splits=10, shuffle=True),
1010
use_test_set=False,
1111
metrics=['balanced_accuracy', 'f1_score', 'matthews_corrcoef',
1212
'sensitivity', 'specificity'],
1313
best_config_metric='f1_score',
1414
optimizer='switch',
1515
optimizer_params={'name': 'sk_opt', 'n_configurations': 10},
16-
project_folder='./tmpv2',
16+
project_folder='./tmp',
1717
cache_folder='./cache',
1818
verbosity=0)
1919

photonai/base/hyperpipe.py

Lines changed: 77 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,79 @@ def score(self, data: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
11671167
scorer = Scorer.create(self.optimization.best_config_metric)
11681168
return scorer(y, predictions)
11691169

1170+
def _calculate_permutation_importances(self, **kwargs):
1171+
"""
1172+
extracted function from get_feature_importance to improve unit testing
1173+
"""
1174+
1175+
importance_list = {'mean': list(), 'std': list()}
1176+
1177+
def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwargs, fold_str):
1178+
1179+
train_X, train_y, train_kwargs = PhotonDataHelper.split_data(data_X, data_y, data_kwargs,
1180+
indices=train_idx)
1181+
1182+
test_X, test_y, test_kwargs = PhotonDataHelper.split_data(data_X, data_y, data_kwargs,
1183+
indices=test_idx)
1184+
1185+
# fit fold's best model (again) -> to obtain that model's feature importances
1186+
logger.photon_system_log("Permutation Importances: Fitting model for " + fold_str)
1187+
pipeline.fit(train_X, train_y, **train_kwargs)
1188+
1189+
# get feature importances
1190+
logger.photon_system_log("Permutation Importances: Calculating performances for " + fold_str)
1191+
perm_imps = permutation_importance(pipeline, test_X, test_y, **kwargs)
1192+
1193+
# store into list
1194+
importance_list['mean'].append(perm_imps["importances_mean"])
1195+
importance_list['std'].append(perm_imps["importances_std"])
1196+
1197+
return perm_imps
1198+
1199+
for outer_fold in self.results.outer_folds:
1200+
1201+
if outer_fold.best_config is None:
1202+
raise ValueError("Could not find a best config for outer fold " + str(outer_fold.fold_nr))
1203+
1204+
pipe_copy = self.optimum_pipe.copy_me()
1205+
1206+
# set pipe to config
1207+
pipe_copy.set_params(**outer_fold.best_config.config_dict)
1208+
1209+
if not self.results.hyperpipe_info.eval_final_performance:
1210+
no_outer_cv_indices = False
1211+
if outer_fold.best_config.best_config_score is None:
1212+
no_outer_cv_indices = True
1213+
if outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
1214+
no_outer_cv_indices = True
1215+
1216+
if no_outer_cv_indices:
1217+
data_to_split, y_to_split, kwargs_to_split = self.data.X, self.data.y, self.data.kwargs
1218+
else:
1219+
1220+
logger.photon_system_log("Permutation Importances: Using inner_cv folds.")
1221+
1222+
# get outer fold data
1223+
idx = outer_fold.best_config.best_config_score.training.indices
1224+
data_to_split, y_to_split, kwargs_to_split = PhotonDataHelper.split_data(self.data.X,
1225+
self.data.y,
1226+
self.data.kwargs,
1227+
indices=idx)
1228+
1229+
for inner_fold in outer_fold.best_config.inner_folds:
1230+
train_and_get_fimps(pipe_copy,
1231+
inner_fold.training.indices, inner_fold.validation.indices,
1232+
data_to_split, y_to_split, kwargs_to_split,
1233+
"inner fold " + str(inner_fold.fold_nr))
1234+
1235+
else:
1236+
train_and_get_fimps(pipe_copy,
1237+
outer_fold.best_config.best_config_score.training.indices,
1238+
outer_fold.best_config.best_config_score.validation.indices,
1239+
self.data.X, self.data.y, self.data.kwargs, "outer fold " + str(outer_fold.fold_nr))
1240+
1241+
return importance_list
1242+
11701243
def get_permutation_feature_importances(self, **kwargs):
11711244
"""
11721245
Fits a model for the best config of each outer fold (using the training data of that fold).
@@ -1191,41 +1264,13 @@ def get_permutation_feature_importances(self, **kwargs):
11911264
11921265
"""
11931266

1194-
importance_list = {'mean': list(), 'std': list()}
1195-
pipe_copy = self.optimum_pipe.copy_me()
11961267
logger.photon_system_log("")
11971268
logger.photon_system_log("Computing permutation importances. This may take a while.")
11981269
logger.stars()
1199-
for outer_fold in self.results.outer_folds:
1200-
1201-
if outer_fold.best_config.best_config_score is None:
1202-
raise ValueError("Cannot compute permutation importances when use_test_set is false")
1203-
1204-
1205-
# prepare data
1206-
train_indices = outer_fold.best_config.best_config_score.training.indices
1207-
test_indices = outer_fold.best_config.best_config_score.validation.indices
1208-
1209-
train_X, train_y, train_kwargs = PhotonDataHelper.split_data(self.data.X,
1210-
self.data.y,
1211-
self.data.kwargs,
1212-
indices=train_indices)
1213-
1214-
test_X, test_y, test_kwargs = PhotonDataHelper.split_data(self.data.X,
1215-
self.data.y,
1216-
self.data.kwargs,
1217-
indices=test_indices)
1218-
# set pipe to config
1219-
pipe_copy.set_params(**outer_fold.best_config.config_dict)
1220-
logger.photon_system_log("Permutation Importances: Fitting model for outer fold " + str(outer_fold.fold_nr))
1221-
pipe_copy.fit(train_X, train_y, **train_kwargs)
1222-
1223-
logger.photon_system_log("Permutation Importances: Calculating performances for outer fold "
1224-
+ str(outer_fold.fold_nr))
1225-
outer_fold_perm_imps = permutation_importance(pipe_copy, test_X, test_y, **kwargs)
1226-
importance_list['mean'].append(outer_fold_perm_imps["importances_mean"])
1227-
importance_list['std'].append(outer_fold_perm_imps["importances_std"])
1228-
1270+
if self.optimum_pipe is None:
1271+
raise ValueError("Cannot calculate permutation importances when optimum_pipe is None (probably the "
1272+
"training and optimization procedure failed)")
1273+
importance_list = self._calculate_permutation_importances(**kwargs)
12291274
mean_importances = np.mean(np.array(importance_list["mean"]), axis=0)
12301275
std_importances = np.mean(np.array(importance_list["std"]), axis=0)
12311276
logger.stars()

photonai/processing/results_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -751,9 +751,9 @@ def write_convenience_files(self):
751751
self.write_predictions_file()
752752

753753
def convert_to_json_serializable(self, value):
754-
if isinstance(value, (np.int, np.int32, np.int64)):
754+
if isinstance(value, (int, np.int32, np.int64)):
755755
return int(value)
756-
if isinstance(value, (np.float, np.float32, np.float64)):
756+
if isinstance(value, (float, np.float32, np.float64)):
757757
if self.output_settings.reduce_space:
758758
return round(float(value), 3)
759759
return float(value)

test/base_tests/test_hyperpipe.py

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ def test_preprocessing(self):
184184

185185
def test_permutation_feature_importances(self):
186186
hp = Hyperpipe('god',
187-
inner_cv=self.inner_cv_object,
187+
outer_cv=KFold(n_splits=3),
188+
inner_cv=KFold(n_splits=2),
188189
metrics=self.metrics,
189190
best_config_metric=self.best_config_metric,
190191
project_folder=self.tmp_folder_path,
@@ -197,16 +198,60 @@ def test_permutation_feature_importances(self):
197198
score_element = svc.score(self.__X, self.__y)
198199
self.assertAlmostEqual(score_photon, score_element)
199200

200-
permutation_score = hp.get_permutation_feature_importances(n_repeats=5, random_state=0)
201-
self.assertTrue("mean" in permutation_score)
202-
self.assertTrue("std" in permutation_score)
203-
self.assertEqual(permutation_score["mean"].shape, (self.__X.shape[1],))
204-
self.assertEqual(permutation_score["std"].shape, (self.__X.shape[1],))
201+
# do it on outer folds
202+
permutation_list_outer = hp._calculate_permutation_importances(n_repeats=5, random_state=0)
203+
self.assertEqual(len(permutation_list_outer["mean"]), 3)
205204

205+
permutation_score_outer = hp.get_permutation_feature_importances(n_repeats=5, random_state=0)
206+
self.assertTrue("mean" in permutation_score_outer)
207+
self.assertTrue("std" in permutation_score_outer)
208+
self.assertEqual(permutation_score_outer["mean"].shape, (self.__X.shape[1],))
209+
self.assertEqual(permutation_score_outer["std"].shape, (self.__X.shape[1],))
210+
211+
# do it on inner folds but on training sets from outer split
212+
hp.cross_validation.use_test_set = False
213+
hp.fit(self.__X, self.__y)
214+
permutation_list_inner = hp._calculate_permutation_importances(n_repeats=5)
215+
self.assertEqual(len(permutation_list_inner["mean"]), 3*2)
216+
permutation_score_inner = hp.get_permutation_feature_importances(n_repeats=5)
217+
self.assertEqual(permutation_score_inner["mean"].shape, (self.__X.shape[1],))
218+
self.assertEqual(permutation_score_inner["std"].shape, (self.__X.shape[1],))
219+
# check that validation set permutation importances (inner folds) differ from those of test set (outer folds)
220+
self.assertFalse(np.array_equal(permutation_score_outer["mean"], permutation_score_inner["mean"]))
221+
222+
# do it on inner folds only
223+
hp.cross_validation.outer_folds = {}
224+
hp.cross_validation.outer_cv = None
206225
hp.cross_validation.use_test_set = False
207226
hp.fit(self.__X, self.__y)
227+
permutation_list_no_outer = hp._calculate_permutation_importances(n_repeats=5)
228+
self.assertEqual(len(permutation_list_no_outer), 2)
229+
permutation_score_no_outer = hp.get_permutation_feature_importances(n_repeats=5)
230+
self.assertEqual(permutation_score_inner["mean"].shape, (self.__X.shape[1],))
231+
self.assertEqual(permutation_score_inner["std"].shape, (self.__X.shape[1],))
232+
233+
# raise error
234+
def fake_metric(y_true, y_pred):
235+
return 'a'
236+
237+
hp = Hyperpipe('god',
238+
outer_cv=KFold(n_splits=3),
239+
inner_cv=KFold(n_splits=2),
240+
metrics=[('fake_metric', fake_metric)],
241+
best_config_metric='fake_metric',
242+
project_folder=self.tmp_folder_path,
243+
verbosity=0)
244+
svc = PipelineElement('SVC')
245+
hp += svc
246+
try:
247+
hp.fit(self.__X, self.__y)
248+
except Exception as e:
249+
# should produce an error so that hp.results.best_config is None.
250+
pass
208251
with self.assertRaises(ValueError):
209252
hp.get_permutation_feature_importances(n_repeats=5)
253+
with self.assertRaises(ValueError):
254+
hp._calculate_permutation_importances(n_repeats=5)
210255

211256
def test_estimation_type(self):
212257
def callback(X, y=None, **kwargs):

test/integration_tests/test_architecture.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@ def setUpClass(cls) -> None:
2525
if cls.test_multiple_hyperpipes:
2626
optimizer_list = ['random_grid_search', 'sk_opt']
2727
eval_final_performance_list = [True, False]
28-
inner_cv_list = [KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.2), LeaveOneOut()]
29-
outer_cv_list = [None, KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.25),
30-
LeaveOneOut()]
28+
inner_cv_list = [KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.2)]
29+
outer_cv_list = [None, KFold(n_splits=3, shuffle=True), ShuffleSplit(n_splits=1, test_size=.25)]
3130
performance_constraints_list = [None]
3231

3332
combinations = list(product(optimizer_list, eval_final_performance_list, inner_cv_list, outer_cv_list,

test/optimization_tests/sk_opt_tests/test_sk_opt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def setUp(self):
1515
PipelineElement("SVC", hyperparameters={'C': FloatRange(1, 100)})]
1616
self.optimizer = SkOptOptimizer()
1717
self.optimizer_name = "sk_opt"
18-
self.optimizer_params = None
18+
self.optimizer_params = {'n_configurations': 10}
1919

2020
def test_ask_advanced(self):
2121
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)