Merge pull request #313 from credo-ai/bugfix/equity

IanAtCredoAI · web-flow · commit 2217646997d1 · 2023-01-23T07:58:17.000-08:00
Bugfix/equity
diff --git a/credoai/__init__.py b/credoai/__init__.py
@@ -3,6 +3,6 @@
 """
 
 from credoai.utils.version_check import validate_version
-from connect._version import __version__
+from credoai._version import __version__
 
 validate_version()
diff --git a/credoai/_version.py b/credoai/_version.py
@@ -2,4 +2,4 @@
 # 1) we don't load dependencies by storing it in __init__.py
 # 2) we can import it in setup.py for the same reason
 # 3) we can import it into your module module
-__version__ = "1.1.4"
+__version__ = "1.1.5"
diff --git a/credoai/artifacts/data/base_data.py b/credoai/artifacts/data/base_data.py
@@ -6,6 +6,7 @@
 
 import pandas as pd
 
+from credoai.utils import global_logger
 from credoai.utils.common import ValidationError, check_pandas
 from credoai.utils.model_utils import type_of_target
 
@@ -217,9 +218,27 @@ def _validate_processed_y(self):
     def _validate_processed_sensitive(self):
         """Validation of processed sensitive features"""
         for col_name, col in self.sensitive_features.iteritems():
+            # validate unique
             unique_values = col.unique()
             if len(unique_values) == 1:
                 raise ValidationError(
                     f"Sensitive Feature column {col_name} must have more "
                     f"than one unique value. Only found one value: {unique_values[0]}"
                 )
+            # validate number in each group
+            for group, value in col.value_counts().iteritems():
+                if value < 10:
+                    global_logger.warning(
+                        f"Dataset Issue! Very few ({value}) records were found for {group} under sensitive feature {col_name}."
+                    )
+            # validate variance in y
+            if self.y is not None:
+                y = pd.DataFrame(self.y)
+                for outcome, outcome_col in y.iteritems():
+                    for group, value in outcome_col.groupby(col).std().iteritems():
+                        if value == 0:
+                            global_logger.warning(
+                                "%s\n%s",
+                                f"Dataset Issue! Zero variance in the outcome ({outcome}) detected for {group} under sensitive feature {col_name}.",
+                                "\tDownstream evaluators may fail or not perform as expected.",
+                            )
diff --git a/credoai/artifacts/model/classification_model.py b/credoai/artifacts/model/classification_model.py
@@ -69,7 +69,7 @@ def _validate_framework(self):
         except:
             message = """Provided model is from unsupported framework. 
             Lens behavior has not been tested or assured with unsupported modeling frameworks."""
-            global_logger.warning(message, message)
+            global_logger.warning(message)
 
     def __post_init__(self):
         """Conditionally updates functionality based on framework"""
@@ -92,9 +92,15 @@ def __post_init__(self):
                 if self.model_like.layers[-1].output_shape == (None, 1):
                     # Assumes sigmoid -> probabilities need to be rounded
                     self.__dict__["predict"] = lambda x: pred_func(x).round()
+                    # Single-output sigmoid is binary by definition
+                    self.type = "BINARY_CLASSIFICATION"
                 else:
                     # Assumes softmax -> probabilities need to be argmaxed
                     self.__dict__["predict"] = lambda x: np.argmax(pred_func(x), axis=1)
+                    if self.model_like.layers[-1].output_shape[1] == 2:
+                        self.type = "BINARY_CLASSIFICATION"
+                    else:
+                        self.type = "MULTICLASS_CLASSIFICATION"
 
                 if self.model_like.layers[-1].output_shape == (None, 2):
                     self.__dict__["predict_proba"] = lambda x: pred_func(x)[:, 1]
@@ -117,11 +123,16 @@ def __post_init__(self):
 
         elif self.model_info["framework"] == "credoai":
             # Functionality for DummyClassifier
-            self.model_like = getattr(self.model_like, "model_like", None)
+            if self.model_like.model_like is not None:
+                self.model_like = self.model_like.model_like
             # If the dummy model has a model_like specified, reassign
             # the classifier's model_like attribute to match the dummy's
             # so that downstream evaluators (ModelProfiler) can use it
 
+            self.type = self.model_like.type
+            # DummyClassifier model type is set in the constructor based on whether it
+            # is binary or multiclass
+
             # Predict and Predict_Proba should already be specified
 
 
@@ -141,6 +152,13 @@ class DummyClassifier:
     model_like : model_like, optional
         While predictions are pre-computed, the model object, itself, may be of use for
         some evaluations (e.g. ModelProfiler).
+    binary_clf : bool, optional, default = True
+        Type of classification model.
+            Used when wrapping with ClassificationModel.
+            If binary == True, ClassificationModel.type will be set to `BINARY_CLASSIFICATION',
+            which enables use of binary metrics.
+            If binary == False, ClassificationModel.type will be set to 'MULTICLASS_CLASSIFICATION',
+            and use those metrics.
     predict_output : array, optional
         Array containing per-sample class labels
             Corresponds to sklearn-like `predict` output
@@ -158,6 +176,7 @@ def __init__(
         self,
         name: str,
         model_like=None,
+        binary_clf=True,
         predict_output=None,
         predict_proba_output=None,
         tags=None,
@@ -167,6 +186,9 @@ def __init__(
         self._build_functionality("predict_proba", predict_proba_output)
         self.name = name
         self.tags = tags
+        self.type = (
+            "BINARY_CLASSIFICATION" if binary_clf else "MULTICLASS_CLASSIFICATION"
+        )
 
     def _wrap_array(self, array):
         return lambda X=None: array
diff --git a/credoai/evaluators/equity.py b/credoai/evaluators/equity.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pandas as pd
-from connect.evidence import MetricContainer, TableContainer, StatisticTestContainer
+from connect.evidence import MetricContainer, StatisticTestContainer, TableContainer
 
 from credoai.artifacts import TabularData
 from credoai.evaluators import Evaluator
@@ -142,6 +142,7 @@ def _get_formatted_stats(self) -> tuple:
             "test_statistic": statistics["statistic"],
             "p_value": statistics["pvalue"],
             "significance_threshold": self.pvalue,
+            "significant": statistics["pvalue"] <= self.pvalue,
         }
 
         overall_equity = StatisticTestContainer(
diff --git a/credoai/evaluators/fairness.py b/credoai/evaluators/fairness.py
@@ -55,7 +55,6 @@ def __init__(
 
     def _validate_arguments(self):
         check_existence(self.metrics, "metrics")
-        check_existence(self.data.X, "X")
         check_existence(self.data.y, "y")
         check_data_for_nulls(
             self.data, "Data", check_X=True, check_y=True, check_sens=True
diff --git a/credoai/evaluators/performance.py b/credoai/evaluators/performance.py
@@ -55,8 +55,7 @@ def __init__(self, metrics=None):
 
     def _validate_arguments(self):
         check_existence(self.metrics, "metrics")
-        check_existence(self.assessment_data.X)
-        check_existence(self.assessment_data.y)
+        check_existence(self.assessment_data.y, "y")
         check_data_for_nulls(
             self.assessment_data, "Data", check_X=True, check_y=True, check_sens=False
         )
diff --git a/credoai/modules/metric_utils.py b/credoai/modules/metric_utils.py
@@ -1,15 +1,9 @@
 import textwrap
 from collections import defaultdict
 
+from credoai.modules.metrics import ALL_METRICS, MODEL_METRIC_CATEGORIES
 from pandas import DataFrame
 
-from credoai.modules.metrics import (
-    ALL_METRICS,
-    METRIC_CATEGORIES,
-    METRIC_NAMES,
-    MODEL_METRIC_CATEGORIES,
-)
-
 
 def list_metrics(verbose=True):
     metrics = defaultdict(set)
diff --git a/docs/notebooks/quickstart.ipynb b/docs/notebooks/quickstart.ipynb
@@ -1,6 +1,7 @@
 {
    "cells": [
       {
+         "attachments": {},
          "cell_type": "markdown",
          "id": "5d608c2a",
          "metadata": {
@@ -15,7 +16,7 @@
             "\n",
             "**Setup**\n",
             "\n",
-            "Lens installation instruction can be found on [readthedocs](https://credoai-lens.readthedocs.io/en/stable/setup.html)\n",
+            "Lens installation instruction can be found on [readthedocs](https://credoai-lens.readthedocs.io/en/stable/pages/setup.html)\n",
             "\n",
             "**Find the code**"
          ]

Original file line number	Diff line number	Diff line change
`@@ -55,8 +55,7 @@ def __init__(self, metrics=None):`
`55`	`55`
`56`	`56`	`def _validate_arguments(self):`
`57`	`57`	`check_existence(self.metrics, "metrics")`
`58`		`- check_existence(self.assessment_data.X)`
`59`		`- check_existence(self.assessment_data.y)`
	`58`	`+ check_existence(self.assessment_data.y, "y")`
`60`	`59`	`check_data_for_nulls(`
`61`	`60`	`self.assessment_data, "Data", check_X=True, check_y=True, check_sens=False`
`62`	`61`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"cells": [`
`3`	`3`	`{`
	`4`	`+ "attachments": {},`
`4`	`5`	`"cell_type": "markdown",`
`5`	`6`	`"id": "5d608c2a",`
`6`	`7`	`"metadata": {`
`@@ -15,7 +16,7 @@`
`15`	`16`	`"\n",`
`16`	`17`	`"Setup\n",`
`17`	`18`	`"\n",`
`18`		`- "Lens installation instruction can be found on [readthedocs](https://credoai-lens.readthedocs.io/en/stable/setup.html)\n",`
	`19`	`+ "Lens installation instruction can be found on [readthedocs](https://credoai-lens.readthedocs.io/en/stable/pages/setup.html)\n",`
`19`	`20`	`"\n",`
`20`	`21`	`"Find the code"`
`21`	`22`	`]`