MAIF
diff --git a/‎docs/tutorials/tuto-common01-groups_of_features.rst‎
Lines changed: 138 additions & 0 deletions b/‎docs/tutorials/tuto-common01-groups_of_features.rst‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎shapash/explainer/smart_explainer.py‎
Lines changed: 35 additions & 12 deletions b/‎shapash/explainer/smart_explainer.py‎
Lines changed: 35 additions & 12 deletions
diff --git a/‎shapash/explainer/smart_plotter.py‎
Lines changed: 2 additions & 1 deletion b/‎shapash/explainer/smart_plotter.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎shapash/explainer/smart_predictor.py‎
Lines changed: 68 additions & 18 deletions b/‎shapash/explainer/smart_predictor.py‎
Lines changed: 68 additions & 18 deletions
@@ -259,7 +259,8 @@ def _compile_features_groups(self, features_groups):
         self.x_pred_groups = create_grouped_features_values(x_pred=self.x_pred, x_init=self.x_init,
                                                             preprocessing=self.preprocessing,
                                                             features_groups=self.features_groups,
-                                                            how='tsne')
+                                                            features_dict=self.features_dict,
+                                                            how='dict_of_values')
         # Compute data attribute for groups of features
         self.data_groups = self.state.assign_contributions(
             self.state.rank_contributions(
@@ -818,7 +819,8 @@ def to_pandas(
             threshold=None,
             positive=None,
             max_contrib=None,
-            proba=False
+            proba=False,
+            use_groups=None
     ):
         """
         The to_pandas method allows to export the summary of local explainability.
@@ -848,6 +850,9 @@ def to_pandas(
             Number of contributions to show in the pandas df
         proba : bool, optional (default: False)
             adding proba in output df
+        use_groups : bool (optional)
+            Whether or not to use groups of features contributions (only available if features_groups
+            parameter was not empty when calling compile method).
 
         Returns
         -------
@@ -864,6 +869,11 @@ def to_pandas(
         1	3	    0.628911	Sex	        2.0	        0.585475	    Pclass	    1.0	        0.370504
         2	0	    0.543308	Sex	        2.0	        -0.486667	    Pclass	    3.0	        0.255072
         """
+        use_groups = True if (use_groups is not False and self.features_groups is not None) else False
+        if use_groups:
+            data = self.data_groups
+        else:
+            data = self.data
 
         # Classification: y_pred is needed
         if self.y_pred is None:
@@ -873,21 +883,34 @@ def to_pandas(
 
         # Apply filter method if necessary
         if all(var is None for var in [features_to_hide, threshold, positive, max_contrib]) \
-                and hasattr(self, 'mask_params'):
+                and hasattr(self, 'mask_params') \
+                and (
+                # if the already computed mask does not have the right shape (this can happen when
+                # we use groups of features once and then use method without groups)
+                (isinstance(data['contrib_sorted'], pd.DataFrame)
+                    and len(data["contrib_sorted"].columns) == len(self.mask.columns))
+                or
+                (isinstance(data['contrib_sorted'], list)
+                    and len(data["contrib_sorted"][0].columns) == len(self.mask[0].columns))
+                ):
             print('to_pandas params: ' + str(self.mask_params))
         else:
             self.filter(features_to_hide=features_to_hide,
                         threshold=threshold,
                         positive=positive,
-                        max_contrib=max_contrib)
-
+                        max_contrib=max_contrib,
+                        display_groups=use_groups)
+        if use_groups:
+            columns_dict = {i: col for i, col in enumerate(self.x_pred_groups.columns)}
+        else:
+            columns_dict = self.columns_dict
         # Summarize information
-        self.data['summary'] = self.state.summarize(
-            self.data['contrib_sorted'],
-            self.data['var_dict'],
-            self.data['x_sorted'],
+        data['summary'] = self.state.summarize(
+            data['contrib_sorted'],
+            data['var_dict'],
+            data['x_sorted'],
             self.mask,
-            self.columns_dict,
+            columns_dict,
             self.features_dict
         )
         # Matching with y_pred
@@ -897,7 +920,7 @@ def to_pandas(
         else:
             proba_values = None
 
-        y_pred, summary = keep_right_contributions(self.y_pred, self.data['summary'],
+        y_pred, summary = keep_right_contributions(self.y_pred, data['summary'],
                                                    self._case, self._classes,
                                                    self.label_dict, proba_values)
 
@@ -1025,7 +1048,7 @@ def to_smartpredictor(self):
         self.features_types = {features: str(self.x_pred[features].dtypes) for features in self.x_pred.columns}
 
         listattributes = ["features_dict", "model", "columns_dict", "explainer", "features_types",
-                          "label_dict", "preprocessing", "postprocessing"]
+                          "label_dict", "preprocessing", "postprocessing", "features_groups"]
 
         params_smartpredictor = [self.check_attributes(attribute) for attribute in listattributes]
 
 
@@ -1240,7 +1240,8 @@ def contribution_plot(self,
 
         if col_is_group:
             feature_values = project_feature_values_1d(feature_values, col, self.explainer.x_pred,
-                                                       self.explainer.x_init, self.explainer.preprocessing)
+                                                       self.explainer.x_init, self.explainer.preprocessing,
+                                                       features_dict=self.explainer.features_dict)
             contrib = subcontrib.loc[list_ind, col].to_frame()
             if self.explainer.features_imp is None:
                 self.explainer.compute_features_import()
 
@@ -21,7 +21,7 @@
 from shapash.manipulation.filters import combine_masks
 from shapash.manipulation.mask import init_mask
 from shapash.manipulation.mask import compute_masked_contributions
-from shapash.manipulation.summarize import summarize
+from shapash.manipulation.summarize import summarize, create_grouped_features_values, group_contributions
 from shapash.decomposition.contributions import rank_contributions, assign_contributions
 from shapash.utils.columntransformer_backend import columntransformer
 import copy
@@ -100,6 +100,7 @@ def __init__(self, features_dict, model,
                  columns_dict, explainer, features_types,
                  label_dict=None, preprocessing=None,
                  postprocessing=None,
+                 features_groups=None,
                  mask_params = {"features_to_hide": None,
                                 "threshold": None,
                                 "positive": None,
@@ -130,10 +131,11 @@ def __init__(self, features_dict, model,
         self.mask_params = mask_params
         self.check_mask_params()
         self.postprocessing = postprocessing
+        self.features_groups = features_groups
         list_preprocessing = preprocessing_tolist(self.preprocessing)
         check_consistency_model_features(self.features_dict, self.model, self.columns_dict,
                                          self.features_types, self.mask_params, self.preprocessing,
-                                         self.postprocessing, list_preprocessing)
+                                         self.postprocessing, list_preprocessing, self.features_groups)
         check_consistency_model_label(self.columns_dict, self.label_dict)
         self._drop_option = check_preprocessing_options(columns_dict, features_dict, preprocessing, list_preprocessing)
 
@@ -217,9 +219,34 @@ def add_input(self, x=None, ypred=None, contributions=None):
             self.data["ypred_init"] = self.check_ypred(ypred)
 
         if contributions is not None:
-            self.data["ypred"], self.data["contributions"] = self.compute_contributions(contributions=contributions)
+            self.data["ypred"], self.data["contributions"] = self.compute_contributions(
+                contributions=contributions,
+                use_groups=False
+            )
         else:
-            self.data["ypred"], self.data["contributions"]  = self.compute_contributions()
+            self.data["ypred"], self.data["contributions"]  = self.compute_contributions(use_groups=False)
+
+        if self.features_groups is not None:
+            self._add_groups_input()
+
+    def _add_groups_input(self):
+        """
+        Compute groups of features values, contributions the same way as add_input method
+        and stores it in data_groups attribute
+        """
+        self.data_groups = dict()
+        self.data_groups['x_postprocessed'] = create_grouped_features_values(x_pred=self.data["x_postprocessed"],
+                                                                             x_init=self.data["x_preprocessed"],
+                                                                             preprocessing=self.preprocessing,
+                                                                             features_groups=self.features_groups,
+                                                                             features_dict=self.features_dict,
+                                                                             how='dict_of_values')
+        self.data_groups['ypred'] = self.data["ypred"]
+        self.data_groups['contributions'] = group_contributions(
+            contributions=self.data['contributions'],
+            features_groups=self.features_groups
+        )
+
 
     def check_dataset_type(self, x=None):
         """
@@ -431,7 +458,7 @@ def predict_proba(self):
         """
         return predict_proba(self.model, self.data["x_preprocessed"], self._classes)
 
-    def compute_contributions(self, contributions=None):
+    def compute_contributions(self, contributions=None, use_groups=None):
         """
         The compute_contributions compute the contributions associated to data ypred specified.
         Need a data ypred specified in an add_input to display detail_contributions.
@@ -440,6 +467,8 @@ def compute_contributions(self, contributions=None):
         -------
         contributions : object (optional)
             Local contributions, or list of local contributions.
+        use_groups : bool (optional)
+            Whether or not to compute groups of features contributions.
 
         Returns
         -------
@@ -449,6 +478,8 @@ def compute_contributions(self, contributions=None):
             ypred data with right probabilities associated.
 
         """
+        use_groups = True if (use_groups is not False and self.features_groups is not None) else False
+
         if not hasattr(self, "data"):
             raise ValueError("add_input method must be called at least once.")
         if self.data["x"] is None:
@@ -475,9 +506,12 @@ def compute_contributions(self, contributions=None):
         y_pred, match_contrib = keep_right_contributions(self.data["ypred_init"], contributions,
                                  self._case, self._classes,
                                  self.label_dict, proba_values)
+        if use_groups:
+            match_contrib = group_contributions(match_contrib, features_groups=self.features_groups)
+
         return y_pred, match_contrib
 
-    def detail_contributions(self, contributions=None):
+    def detail_contributions(self, contributions=None, use_groups=None):
         """
         The detail_contributions method associates the right contributions with the right data predicted.
         (with ypred specified in add_input or computed automatically)
@@ -486,6 +520,8 @@ def detail_contributions(self, contributions=None):
         -------
         contributions : object (optional)
             Local contributions, or list of local contributions.
+        use_groups : bool (optional)
+            Whether or not to compute groups of features contributions.
 
         Returns
         -------
@@ -499,7 +535,7 @@ def detail_contributions(self, contributions=None):
         >>> predictor.detail_contributions()
 
         """
-        y_pred, detail_contrib = self.compute_contributions(contributions=contributions)
+        y_pred, detail_contrib = self.compute_contributions(contributions=contributions, use_groups=use_groups)
         return pd.concat([y_pred, detail_contrib], axis=1)
 
     def apply_preprocessing_for_contributions(self, contributions, preprocessing=None):
@@ -593,7 +629,7 @@ def filter(self):
             self.mask
         )
 
-    def summarize(self):
+    def summarize(self, use_groups=None):
         """
         The summarize method allows to display the summary of local explainability.
         This method can be configured with modify_mask method to summarize the explainability to suit needs.
@@ -606,6 +642,11 @@ def summarize(self):
             - the right probabilities from predict_proba associated to the right predicted values
             - the right contributions ranked and filtered as specify with modify_mask method
 
+        Parameters
+        ----------
+        use_groups : bool (optional)
+            Whether or not to compute groups of features contributions.
+
         Returns
         -------
         pandas.DataFrame
@@ -629,39 +670,47 @@ def summarize(self):
         2	0	    0.543308	Sex	        2.0	        -0.486667
         """
         # data is needed : add_input() method must be called at least once
+        use_groups = True if (use_groups is not False and self.features_groups is not None) else False
 
         if not hasattr(self, "data"):
             raise ValueError("You have to specify dataset x and y_pred arguments. Please use add_input() method.")
 
+        if use_groups is True:
+            data = self.data_groups
+        else:
+            data = self.data
+
         if self._drop_option is not None:
-            x_preprocessed = self.data["x_postprocessed"][self._drop_option["columns_dict_op"].values()]
-            columns_dict =self._drop_option["columns_dict_op"]
-            features_dict = self._drop_option["features_dict_op"]
+            columns_to_keep = [x for x in self._drop_option["columns_dict_op"].values()
+                               if x in data["x_postprocessed"].columns]
+            if use_groups:
+                columns_to_keep += list(self.features_groups.keys())
+            x_preprocessed = data["x_postprocessed"][columns_to_keep]
         else:
-            x_preprocessed = self.data["x_postprocessed"]
-            columns_dict = self.columns_dict
-            features_dict = self.features_dict
+            x_preprocessed = data["x_postprocessed"]
 
+        columns_dict = {i: col for i, col in enumerate(x_preprocessed.columns)}
+        features_dict = {k: v for k, v in self.features_dict.items() if k in x_preprocessed.columns}
 
         self.summary = assign_contributions(
             rank_contributions(
-                self.data["contributions"],
+                data["contributions"],
                 x_preprocessed
             )
         )
         # Apply filter method with mask_params attributes parameters
         self.filter()
 
         # Summarize information
-        self.data['summary'] = summarize(self.summary['contrib_sorted'],
+        data['summary'] = summarize(self.summary['contrib_sorted'],
                                          self.summary['var_dict'],
                                          self.summary['x_sorted'],
                                          self.mask,
                                          columns_dict,
                                          features_dict)
 
         # Matching with y_pred
-        return pd.concat([self.data["ypred"], self.data['summary']], axis=1)
+        return pd.concat([data["ypred"], data['summary']], axis=1)
 
     def modify_mask(
             self,
@@ -804,5 +853,6 @@ def to_smartexplainer(self):
                     explainer=self.explainer,
                     y_pred=copy.deepcopy(self.data["ypred_init"]),
                     preprocessing=self.preprocessing,
-                    postprocessing=self.postprocessing)
+                    postprocessing=self.postprocessing,
+                    features_groups=self.features_groups)
         return xpl