@@ -1167,6 +1167,79 @@ def score(self, data: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
11671167 scorer = Scorer .create (self .optimization .best_config_metric )
11681168 return scorer (y , predictions )
11691169
1170+ def _calculate_permutation_importances (self , ** kwargs ):
1171+ """
1172+ extracted function from get_feature_importance to improve unit testing
1173+ """
1174+
1175+ importance_list = {'mean' : list (), 'std' : list ()}
1176+
1177+ def train_and_get_fimps (pipeline , train_idx , test_idx , data_X , data_y , data_kwargs , fold_str ):
1178+
1179+ train_X , train_y , train_kwargs = PhotonDataHelper .split_data (data_X , data_y , data_kwargs ,
1180+ indices = train_idx )
1181+
1182+ test_X , test_y , test_kwargs = PhotonDataHelper .split_data (data_X , data_y , data_kwargs ,
1183+ indices = test_idx )
1184+
1185+ # fit fold's best model (again) -> to obtain that model's feature importances
1186+ logger .photon_system_log ("Permutation Importances: Fitting model for " + fold_str )
1187+ pipeline .fit (train_X , train_y , ** train_kwargs )
1188+
1189+ # get feature importances
1190+ logger .photon_system_log ("Permutation Importances: Calculating performances for " + fold_str )
1191+ perm_imps = permutation_importance (pipeline , test_X , test_y , ** kwargs )
1192+
1193+ # store into list
1194+ importance_list ['mean' ].append (perm_imps ["importances_mean" ])
1195+ importance_list ['std' ].append (perm_imps ["importances_std" ])
1196+
1197+ return perm_imps
1198+
1199+ for outer_fold in self .results .outer_folds :
1200+
1201+ if outer_fold .best_config is None :
1202+ raise ValueError ("Could not find a best config for outer fold " + str (outer_fold .fold_nr ))
1203+
1204+ pipe_copy = self .optimum_pipe .copy_me ()
1205+
1206+ # set pipe to config
1207+ pipe_copy .set_params (** outer_fold .best_config .config_dict )
1208+
1209+ if not self .results .hyperpipe_info .eval_final_performance :
1210+ no_outer_cv_indices = False
1211+ if outer_fold .best_config .best_config_score is None :
1212+ no_outer_cv_indices = True
1213+ if outer_fold .best_config .best_config_score .training is None or not outer_fold .best_config .best_config_score .training .indices :
1214+ no_outer_cv_indices = True
1215+
1216+ if no_outer_cv_indices :
1217+ data_to_split , y_to_split , kwargs_to_split = self .data .X , self .data .y , self .data .kwargs
1218+ else :
1219+
1220+ logger .photon_system_log ("Permutation Importances: Using inner_cv folds." )
1221+
1222+ # get outer fold data
1223+ idx = outer_fold .best_config .best_config_score .training .indices
1224+ data_to_split , y_to_split , kwargs_to_split = PhotonDataHelper .split_data (self .data .X ,
1225+ self .data .y ,
1226+ self .data .kwargs ,
1227+ indices = idx )
1228+
1229+ for inner_fold in outer_fold .best_config .inner_folds :
1230+ train_and_get_fimps (pipe_copy ,
1231+ inner_fold .training .indices , inner_fold .validation .indices ,
1232+ data_to_split , y_to_split , kwargs_to_split ,
1233+ "inner fold " + str (inner_fold .fold_nr ))
1234+
1235+ else :
1236+ train_and_get_fimps (pipe_copy ,
1237+ outer_fold .best_config .best_config_score .training .indices ,
1238+ outer_fold .best_config .best_config_score .validation .indices ,
1239+ self .data .X , self .data .y , self .data .kwargs , "outer fold " + str (outer_fold .fold_nr ))
1240+
1241+ return importance_list
1242+
11701243 def get_permutation_feature_importances (self , ** kwargs ):
11711244 """
11721245 Fits a model for the best config of each outer fold (using the training data of that fold).
@@ -1191,41 +1264,13 @@ def get_permutation_feature_importances(self, **kwargs):
11911264
11921265 """
11931266
1194- importance_list = {'mean' : list (), 'std' : list ()}
1195- pipe_copy = self .optimum_pipe .copy_me ()
11961267 logger .photon_system_log ("" )
11971268 logger .photon_system_log ("Computing permutation importances. This may take a while." )
11981269 logger .stars ()
1199- for outer_fold in self .results .outer_folds :
1200-
1201- if outer_fold .best_config .best_config_score is None :
1202- raise ValueError ("Cannot compute permutation importances when use_test_set is false" )
1203-
1204-
1205- # prepare data
1206- train_indices = outer_fold .best_config .best_config_score .training .indices
1207- test_indices = outer_fold .best_config .best_config_score .validation .indices
1208-
1209- train_X , train_y , train_kwargs = PhotonDataHelper .split_data (self .data .X ,
1210- self .data .y ,
1211- self .data .kwargs ,
1212- indices = train_indices )
1213-
1214- test_X , test_y , test_kwargs = PhotonDataHelper .split_data (self .data .X ,
1215- self .data .y ,
1216- self .data .kwargs ,
1217- indices = test_indices )
1218- # set pipe to config
1219- pipe_copy .set_params (** outer_fold .best_config .config_dict )
1220- logger .photon_system_log ("Permutation Importances: Fitting model for outer fold " + str (outer_fold .fold_nr ))
1221- pipe_copy .fit (train_X , train_y , ** train_kwargs )
1222-
1223- logger .photon_system_log ("Permutation Importances: Calculating performances for outer fold "
1224- + str (outer_fold .fold_nr ))
1225- outer_fold_perm_imps = permutation_importance (pipe_copy , test_X , test_y , ** kwargs )
1226- importance_list ['mean' ].append (outer_fold_perm_imps ["importances_mean" ])
1227- importance_list ['std' ].append (outer_fold_perm_imps ["importances_std" ])
1228-
1270+ if self .optimum_pipe is None :
1271+ raise ValueError ("Cannot calculate permutation importances when optimum_pipe is None (probably the "
1272+ "training and optimization procedure failed)" )
1273+ importance_list = self ._calculate_permutation_importances (** kwargs )
12291274 mean_importances = np .mean (np .array (importance_list ["mean" ]), axis = 0 )
12301275 std_importances = np .mean (np .array (importance_list ["std" ]), axis = 0 )
12311276 logger .stars ()
0 commit comments