Skip to content
This repository was archived by the owner on Jul 12, 2024. It is now read-only.

Commit 2217646

Browse files
authored
Merge pull request #313 from credo-ai/bugfix/equity
Bugfix/equity
2 parents 8a708b1 + c752c45 commit 2217646

File tree

9 files changed

+51
-16
lines changed

9 files changed

+51
-16
lines changed

credoai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
"""
44

55
from credoai.utils.version_check import validate_version
6-
from connect._version import __version__
6+
from credoai._version import __version__
77

88
validate_version()

credoai/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# 1) we don't load dependencies by storing it in __init__.py
33
# 2) we can import it in setup.py for the same reason
44
# 3) we can import it into your module module
5-
__version__ = "1.1.4"
5+
__version__ = "1.1.5"

credoai/artifacts/data/base_data.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import pandas as pd
88

9+
from credoai.utils import global_logger
910
from credoai.utils.common import ValidationError, check_pandas
1011
from credoai.utils.model_utils import type_of_target
1112

@@ -217,9 +218,27 @@ def _validate_processed_y(self):
217218
def _validate_processed_sensitive(self):
218219
"""Validation of processed sensitive features"""
219220
for col_name, col in self.sensitive_features.iteritems():
221+
# validate unique
220222
unique_values = col.unique()
221223
if len(unique_values) == 1:
222224
raise ValidationError(
223225
f"Sensitive Feature column {col_name} must have more "
224226
f"than one unique value. Only found one value: {unique_values[0]}"
225227
)
228+
# validate number in each group
229+
for group, value in col.value_counts().iteritems():
230+
if value < 10:
231+
global_logger.warning(
232+
f"Dataset Issue! Very few ({value}) records were found for {group} under sensitive feature {col_name}."
233+
)
234+
# validate variance in y
235+
if self.y is not None:
236+
y = pd.DataFrame(self.y)
237+
for outcome, outcome_col in y.iteritems():
238+
for group, value in outcome_col.groupby(col).std().iteritems():
239+
if value == 0:
240+
global_logger.warning(
241+
"%s\n%s",
242+
f"Dataset Issue! Zero variance in the outcome ({outcome}) detected for {group} under sensitive feature {col_name}.",
243+
"\tDownstream evaluators may fail or not perform as expected.",
244+
)

credoai/artifacts/model/classification_model.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def _validate_framework(self):
6969
except:
7070
message = """Provided model is from unsupported framework.
7171
Lens behavior has not been tested or assured with unsupported modeling frameworks."""
72-
global_logger.warning(message, message)
72+
global_logger.warning(message)
7373

7474
def __post_init__(self):
7575
"""Conditionally updates functionality based on framework"""
@@ -92,9 +92,15 @@ def __post_init__(self):
9292
if self.model_like.layers[-1].output_shape == (None, 1):
9393
# Assumes sigmoid -> probabilities need to be rounded
9494
self.__dict__["predict"] = lambda x: pred_func(x).round()
95+
# Single-output sigmoid is binary by definition
96+
self.type = "BINARY_CLASSIFICATION"
9597
else:
9698
# Assumes softmax -> probabilities need to be argmaxed
9799
self.__dict__["predict"] = lambda x: np.argmax(pred_func(x), axis=1)
100+
if self.model_like.layers[-1].output_shape[1] == 2:
101+
self.type = "BINARY_CLASSIFICATION"
102+
else:
103+
self.type = "MULTICLASS_CLASSIFICATION"
98104

99105
if self.model_like.layers[-1].output_shape == (None, 2):
100106
self.__dict__["predict_proba"] = lambda x: pred_func(x)[:, 1]
@@ -117,11 +123,16 @@ def __post_init__(self):
117123

118124
elif self.model_info["framework"] == "credoai":
119125
# Functionality for DummyClassifier
120-
self.model_like = getattr(self.model_like, "model_like", None)
126+
if self.model_like.model_like is not None:
127+
self.model_like = self.model_like.model_like
121128
# If the dummy model has a model_like specified, reassign
122129
# the classifier's model_like attribute to match the dummy's
123130
# so that downstream evaluators (ModelProfiler) can use it
124131

132+
self.type = self.model_like.type
133+
# DummyClassifier model type is set in the constructor based on whether it
134+
# is binary or multiclass
135+
125136
# Predict and Predict_Proba should already be specified
126137

127138

@@ -141,6 +152,13 @@ class DummyClassifier:
141152
model_like : model_like, optional
142153
While predictions are pre-computed, the model object, itself, may be of use for
143154
some evaluations (e.g. ModelProfiler).
155+
binary_clf : bool, optional, default = True
156+
Type of classification model.
157+
Used when wrapping with ClassificationModel.
158+
If binary == True, ClassificationModel.type will be set to `BINARY_CLASSIFICATION',
159+
which enables use of binary metrics.
160+
If binary == False, ClassificationModel.type will be set to 'MULTICLASS_CLASSIFICATION',
161+
and use those metrics.
144162
predict_output : array, optional
145163
Array containing per-sample class labels
146164
Corresponds to sklearn-like `predict` output
@@ -158,6 +176,7 @@ def __init__(
158176
self,
159177
name: str,
160178
model_like=None,
179+
binary_clf=True,
161180
predict_output=None,
162181
predict_proba_output=None,
163182
tags=None,
@@ -167,6 +186,9 @@ def __init__(
167186
self._build_functionality("predict_proba", predict_proba_output)
168187
self.name = name
169188
self.tags = tags
189+
self.type = (
190+
"BINARY_CLASSIFICATION" if binary_clf else "MULTICLASS_CLASSIFICATION"
191+
)
170192

171193
def _wrap_array(self, array):
172194
return lambda X=None: array

credoai/evaluators/equity.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import numpy as np
22
import pandas as pd
3-
from connect.evidence import MetricContainer, TableContainer, StatisticTestContainer
3+
from connect.evidence import MetricContainer, StatisticTestContainer, TableContainer
44

55
from credoai.artifacts import TabularData
66
from credoai.evaluators import Evaluator
@@ -142,6 +142,7 @@ def _get_formatted_stats(self) -> tuple:
142142
"test_statistic": statistics["statistic"],
143143
"p_value": statistics["pvalue"],
144144
"significance_threshold": self.pvalue,
145+
"significant": statistics["pvalue"] <= self.pvalue,
145146
}
146147

147148
overall_equity = StatisticTestContainer(

credoai/evaluators/fairness.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def __init__(
5555

5656
def _validate_arguments(self):
5757
check_existence(self.metrics, "metrics")
58-
check_existence(self.data.X, "X")
5958
check_existence(self.data.y, "y")
6059
check_data_for_nulls(
6160
self.data, "Data", check_X=True, check_y=True, check_sens=True

credoai/evaluators/performance.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ def __init__(self, metrics=None):
5555

5656
def _validate_arguments(self):
5757
check_existence(self.metrics, "metrics")
58-
check_existence(self.assessment_data.X)
59-
check_existence(self.assessment_data.y)
58+
check_existence(self.assessment_data.y, "y")
6059
check_data_for_nulls(
6160
self.assessment_data, "Data", check_X=True, check_y=True, check_sens=False
6261
)

credoai/modules/metric_utils.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
import textwrap
22
from collections import defaultdict
33

4+
from credoai.modules.metrics import ALL_METRICS, MODEL_METRIC_CATEGORIES
45
from pandas import DataFrame
56

6-
from credoai.modules.metrics import (
7-
ALL_METRICS,
8-
METRIC_CATEGORIES,
9-
METRIC_NAMES,
10-
MODEL_METRIC_CATEGORIES,
11-
)
12-
137

148
def list_metrics(verbose=True):
159
metrics = defaultdict(set)

docs/notebooks/quickstart.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"cells": [
33
{
4+
"attachments": {},
45
"cell_type": "markdown",
56
"id": "5d608c2a",
67
"metadata": {
@@ -15,7 +16,7 @@
1516
"\n",
1617
"**Setup**\n",
1718
"\n",
18-
"Lens installation instruction can be found on [readthedocs](https://credoai-lens.readthedocs.io/en/stable/setup.html)\n",
19+
"Lens installation instruction can be found on [readthedocs](https://credoai-lens.readthedocs.io/en/stable/pages/setup.html)\n",
1920
"\n",
2021
"**Find the code**"
2122
]

0 commit comments

Comments
 (0)