Merge pull request #35 from wwu-mmll/develop

ksarink · web-flow · commit 42179506ff0b · 2021-03-04T15:36:38.000+01:00
Develop
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-![PHOTON LOGO](http://www.photon-ai.com/static/img/photon/photon-logo-github.png "PHOTON Logo")
+[![PHOTON LOGO](https://www.photon-ai.com/static/img/photon/photon-logo-github.png)](https://www.photon-ai.com/)
 
 [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/wwu-mmll/photonai/PHOTONAI%20test%20and%20test%20deploy)](https://github.com/wwu-mmll/photonai/actions)
 [![Coverage Status](https://coveralls.io/repos/github/wwu-mmll/photonai/badge.svg?branch=master)](https://coveralls.io/github/wwu-mmll/photonai?branch=master)
diff --git a/examples/advanced/feature_importance.py b/examples/advanced/feature_importance.py
@@ -18,10 +18,10 @@
 my_pipe += PipelineElement('Ridge', alpha=1e-2)
 my_pipe.fit(X_train, y_train)
 
-r = my_pipe.get_permutation_feature_importances(X_val, y_val, n_repeats=50, random_state=0)
+r = my_pipe.get_permutation_feature_importances(n_repeats=50, random_state=0)
 
-for i in r.importances_mean.argsort()[::-1]:
-    if r.importances_mean[i] - 2 * r.importances_std[i] > 0:
+for i in r["mean"].argsort()[::-1]:
+    if r["mean"][i] - 2 * r["std"][i] > 0:
         print(f"{diabetes.feature_names[i]:<8}"
-              f"{r.importances_mean[i]:.3f}"
-              f" +/- {r.importances_std[i]:.3f}")
+              f"{r['mean'][i]:.3f}"
+              f" +/- {r['std'][i]:.3f}")
diff --git a/photonai/base/hyperpipe.py b/photonai/base/hyperpipe.py
@@ -29,6 +29,7 @@
     PhotonNative
 from photonai.base.photon_pipeline import PhotonPipeline
 from photonai.base.json_transformer import JsonTransformer
+from photonai.helper.helper import PhotonDataHelper
 from photonai.optimization import FloatRange
 from photonai.photonlogger.logger import logger
 from photonai.processing import ResultsHandler
@@ -1166,12 +1167,11 @@ def score(self, data: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
             scorer = Scorer.create(self.optimization.best_config_metric)
             return scorer(y, predictions)
 
-    def get_permutation_feature_importances(self, X_val: np.ndarray, y_val: np.ndarray, **kwargs):
+    def get_permutation_feature_importances(self, **kwargs):
         """
-        Since PHOTONAI is built on top of the scikit-learn interface,
-        it is possible to use direct functions from their package.
-        Here the example of the [feature importance via permutations](
-        https://scikit-learn.org/stable/modules/generated/sklearn.inspection.permutation_importance.html).
+        Fits a model for the best config of each outer fold (using the training data of that fold).
+        Then calls sklearn.inspection.permutation_importance with the test data and the given kwargs (e.g. n_repeats).
+        Returns mean of "importances_mean" and of "importances_std" of all outer folds.
 
         Parameters:
             X_val:
@@ -1187,11 +1187,50 @@ def get_permutation_feature_importances(self, X_val: np.ndarray, y_val: np.ndarr
                 Keyword arguments, passed to sklearn.permutation_importance.
 
         Returns:
-            Dictionary-like object, with the following attributes: importances_mean, importances_std, importances.
+            Dictionary with average of "mean" and "std" for all outer folds, respectively.
 
         """
 
-        return permutation_importance(self.optimum_pipe, X_val, y_val, **kwargs)
+        importance_list = {'mean': list(), 'std': list()}
+        pipe_copy = self.optimum_pipe.copy_me()
+        logger.photon_system_log("")
+        logger.photon_system_log("Computing permutation importances. This may take a while.")
+        logger.stars()
+        for outer_fold in self.results.outer_folds:
+
+            if outer_fold.best_config.best_config_score is None:
+                raise ValueError("Cannot compute permutation importances when use_test_set is false")
+
+
+            # prepare data
+            train_indices = outer_fold.best_config.best_config_score.training.indices
+            test_indices = outer_fold.best_config.best_config_score.validation.indices
+
+            train_X, train_y, train_kwargs = PhotonDataHelper.split_data(self.data.X,
+                                                                         self.data.y,
+                                                                         self.data.kwargs,
+                                                                         indices=train_indices)
+
+            test_X, test_y, test_kwargs = PhotonDataHelper.split_data(self.data.X,
+                                                                      self.data.y,
+                                                                      self.data.kwargs,
+                                                                      indices=test_indices)
+            # set pipe to config
+            pipe_copy.set_params(**outer_fold.best_config.config_dict)
+            logger.photon_system_log("Permutation Importances: Fitting model for outer fold " + str(outer_fold.fold_nr))
+            pipe_copy.fit(train_X, train_y, **train_kwargs)
+
+            logger.photon_system_log("Permutation Importances: Calculating performances for outer fold "
+                                     + str(outer_fold.fold_nr))
+            outer_fold_perm_imps = permutation_importance(pipe_copy, test_X, test_y, **kwargs)
+            importance_list['mean'].append(outer_fold_perm_imps["importances_mean"])
+            importance_list['std'].append(outer_fold_perm_imps["importances_std"])
+
+        mean_importances = np.mean(np.array(importance_list["mean"]), axis=0)
+        std_importances = np.mean(np.array(importance_list["std"]), axis=0)
+        logger.stars()
+
+        return {'mean': mean_importances, 'std': std_importances}
 
     def inverse_transform_pipeline(self, hyperparameters: dict,
                                    data: np.ndarray,
diff --git a/test/base_tests/test_hyperpipe.py b/test/base_tests/test_hyperpipe.py
@@ -197,9 +197,16 @@ def test_permutation_feature_importances(self):
         score_element = svc.score(self.__X, self.__y)
         self.assertAlmostEqual(score_photon, score_element)
 
-        permutation_score = hp.get_permutation_feature_importances(self.__X, self.__y, n_repeats=50, random_state=0)
-        score_2 = permutation_importance(svc, self.__X, self.__y, n_repeats=50, random_state=0)
-        np.testing.assert_array_equal(permutation_score["importances"], score_2["importances"])
+        permutation_score = hp.get_permutation_feature_importances(n_repeats=5, random_state=0)
+        self.assertTrue("mean" in permutation_score)
+        self.assertTrue("std" in permutation_score)
+        self.assertEqual(permutation_score["mean"].shape, (self.__X.shape[1],))
+        self.assertEqual(permutation_score["std"].shape, (self.__X.shape[1],))
+
+        hp.cross_validation.use_test_set = False
+        hp.fit(self.__X, self.__y)
+        with self.assertRaises(ValueError):
+            hp.get_permutation_feature_importances(n_repeats=5)
 
     def test_estimation_type(self):
         def callback(X, y=None, **kwargs):

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-![PHOTON LOGO](http://www.photon-ai.com/static/img/photon/photon-logo-github.png "PHOTON Logo")`
	`1`	`+[![PHOTON LOGO](https://www.photon-ai.com/static/img/photon/photon-logo-github.png)](https://www.photon-ai.com/)`
`2`	`2`
`3`	`3`	`[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/wwu-mmll/photonai/PHOTONAI%20test%20and%20test%20deploy)](https://github.com/wwu-mmll/photonai/actions)`
`4`	`4`	`[![Coverage Status](https://coveralls.io/repos/github/wwu-mmll/photonai/badge.svg?branch=master)](https://coveralls.io/github/wwu-mmll/photonai?branch=master)`