Merge pull request #50 from wamartin-aml/wamartin-aml/pdpEbmContracts

interpret-ml · web-flow · commit 30e18a43216c · 2019-08-20T17:13:03.000-07:00
Add pdp and ebm contracts
diff --git a/python/interpret/blackbox/partialdependence.py b/python/interpret/blackbox/partialdependence.py
@@ -79,7 +79,7 @@ def _gen_pdp(
             X_mut[:, col_idx] = grid_point
             ice_lines[:, idx] = predict_fn(X_mut)
         mean = np.mean(ice_lines, axis=0)
-        # std = np.std(ice_lines, axis=0)
+        std = np.std(ice_lines, axis=0)
 
         ice_lines = ice_lines[
             np.random.choice(ice_lines.shape[0], num_ice_samples, replace=False), :
@@ -92,8 +92,8 @@ def _gen_pdp(
             "values": X[:, col_idx],
             "density": {"names": values, "scores": counts},
             # NOTE: We can take either bounds or background values, picked one.
-            # 'upper_bounds': mean + std * std_coef,
-            # 'lower_bounds': mean - std * std_coef,
+            "upper_bounds": mean + std * std_coef,
+            "lower_bounds": mean - std * std_coef,
             "background_scores": ice_lines,
         }
 
@@ -102,6 +102,8 @@ def explain_global(self, name=None):
             name = gen_name_from_class(self)
 
         data_dicts = []
+        feature_list = []
+        density_list = []
         for col_idx, feature in enumerate(self.feature_names):
             feature_type = self.feature_types[col_idx]
             pdp = PartialDependence._gen_pdp(
@@ -112,9 +114,30 @@ def explain_global(self, name=None):
                 num_points=self.num_points,
                 std_coef=self.std_coef,
             )
+            feature_dict = {
+                "feature_values": pdp["values"],
+                "scores": pdp["scores"],
+                "upper_bounds": pdp["upper_bounds"],
+                "lower_bounds": pdp["lower_bounds"]
+            }
+            feature_list.append(feature_dict)
+            density_list.append(pdp["density"])
             data_dicts.append(pdp)
 
-        internal_obj = {"overall": None, "specific": data_dicts}
+        internal_obj = {"overall": None, "specific": data_dicts, "mli": [
+            {
+                "explanation_type": "pdp",
+                "value": {
+                    "feature_list": feature_list
+                }
+            },
+            {
+                "explanation_type": "density",
+                "value": {
+                    "density": density_list
+                }
+            }
+        ]}
 
         selector = gen_global_selector(
             self.data, self.feature_names, self.feature_types, None
diff --git a/python/interpret/glassbox/ebm/ebm.py b/python/interpret/glassbox/ebm/ebm.py
@@ -1026,6 +1026,8 @@ def explain_global(self, name=None):
 
         # Add per feature graph
         data_dicts = []
+        feature_list = []
+        density_list = []
         for attribute_set_index, attribute_set in enumerate(self.attribute_sets_):
             model_graph = self.attribute_set_models_[attribute_set_index]
 
@@ -1038,21 +1040,37 @@ def explain_global(self, name=None):
                 # bin_counts = self.preprocessor_.get_bin_counts(
                 #     attribute_indexes[0]
                 # )
+                scores = list(model_graph)
+                upper_bounds = list(model_graph + errors)
+                lower_bounds = list(model_graph - errors)
+                density_dict = {
+                    "names": self.preprocessor_.get_hist_edges(
+                        attribute_indexes[0]
+                    ),
+                    "scores": self.preprocessor_.get_hist_counts(
+                        attribute_indexes[0]
+                    ),
+                }
+
+                feature_dict = {
+                    "type": "univariate",
+                    "names": bin_labels,
+                    "scores": scores,
+                    "scores_range": bounds,
+                    "upper_bounds": upper_bounds,
+                    "lower_bounds": lower_bounds,
+                }
+                feature_list.append(feature_dict)
+                density_list.append(density_dict)
+
                 data_dict = {
                     "type": "univariate",
                     "names": bin_labels,
-                    "scores": list(model_graph),
+                    "scores": scores,
                     "scores_range": bounds,
-                    "upper_bounds": list(model_graph + errors),
-                    "lower_bounds": list(model_graph - errors),
-                    "density": {
-                        "names": self.preprocessor_.get_hist_edges(
-                            attribute_indexes[0]
-                        ),
-                        "scores": self.preprocessor_.get_hist_counts(
-                            attribute_indexes[0]
-                        ),
-                    },
+                    "upper_bounds": upper_bounds,
+                    "lower_bounds": lower_bounds,
+                    "density": density_dict,
                 }
                 data_dicts.append(data_dict)
             elif len(attribute_indexes) == 2:
@@ -1062,6 +1080,17 @@ def explain_global(self, name=None):
                 bin_labels_right = self.preprocessor_.get_bin_labels(
                     attribute_indexes[1]
                 )
+
+                feature_dict = {
+                    "type": "pairwise",
+                    "left_names": bin_labels_left,
+                    "right_names": bin_labels_right,
+                    "scores": model_graph,
+                    "scores_range": bounds,
+                }
+                feature_list.append(feature_dict)
+                density_list.append({})
+
                 data_dict = {
                     "type": "pairwise",
                     "left_names": bin_labels_left,
@@ -1078,7 +1107,20 @@ def explain_global(self, name=None):
             "names": self.feature_names,
             "scores": self.mean_abs_scores_,
         }
-        internal_obj = {"overall": overall_dict, "specific": data_dicts}
+        internal_obj = {"overall": overall_dict, "specific": data_dicts, "mli": [
+            {
+                "explanation_type": "ebm_global",
+                "value": {
+                    "feature_list": feature_list
+                }
+            },
+            {
+                "explanation_type": "density",
+                "value": {
+                    "density": density_list
+                }
+            }
+        ]}
 
         return EBMExplanation(
             "global",
@@ -1134,12 +1176,31 @@ def explain_local(self, X, y=None, name=None):
         else:
             scores = EBMUtils.regressor_predict(instances, self)
 
+        perf_list = []
         for row_idx in range(n_rows):
-            data_dicts[row_idx]["perf"] = perf_dict(y, scores, row_idx)
+            perf = perf_dict(y, scores, row_idx)
+            perf_list.append(perf)
+            data_dicts[row_idx]["perf"] = perf
 
         selector = gen_local_selector(instances, y, scores)
 
-        internal_obj = {"overall": None, "specific": data_dicts}
+        internal_obj = {"overall": None, "specific": data_dicts, "mli": [
+                {
+                    "explanation_type": "ebm_local",
+                    "value": {
+                        "scores": self.attribute_set_models_,
+                        "intercept": self.intercept_,
+                        "perf": perf_list,
+                    },
+                }
+            ],
+        }
+        internal_obj["mli"].append(
+            {
+                "explanation_type": "evaluation_dataset",
+                "value": {"dataset_x": X, "dataset_y": y},
+            }
+        )
 
         return EBMExplanation(
             "local",