Skip to content
This repository was archived by the owner on Jul 12, 2024. It is now read-only.

Commit 65528aa

Browse files
authored
Merge pull request #218 from credo-ai/release/1.0.1
Release/1.0.1
2 parents f73d4bf + 27e077c commit 65528aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2132
-262
lines changed

credoai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
Primary interface for Credo AI Lens package
33
"""
44

5-
__version__ = "1.0.0"
5+
__version__ = "1.0.1"

credoai/artifacts/data/tabular_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ class TabularData(Data):
2525
Outcome
2626
sensitive_features : pd.Series, pd.DataFrame, optional
2727
Sensitive Features, which will be used for disaggregating performance
28-
metrics. This can be the columns you want to perform segmentation analysis on, or
29-
a feature related to fairness like 'race' or 'gender'
28+
metrics. This can be the feature you want to perform segmentation analysis on, or
29+
a feature related to fairness like 'race' or 'gender'. Sensitive Features *must*
30+
be categorical features.
3031
sensitive_intersections : bool, list
3132
Whether to add intersections of sensitive features. If True, add all possible
3233
intersections. If list, only create intersections from specified sensitive features.

credoai/evaluators/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44

55
from .evaluator import Evaluator
66
from .data_fairness import DataFairness
7-
from .data_profiling import DataProfiling
7+
from .data_profiler import DataProfiler
88
from .privacy import Privacy
99
from .security import Security
1010
from .equity import DataEquity, ModelEquity
1111
from .performance import Performance
1212
from .fairness import ModelFairness
1313
from .ranking_fairness import RankingFairness
1414
from .survival_fairness import SurvivalFairness
15+
from .shap import ShapExplainer
16+
from .model_profiler import ModelProfiler
17+
from .feature_drift import FeatureDrift

credoai/evaluators/data_fairness.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ def __init__(
6767
self.categorical_threshold = categorical_threshold
6868
super().__init__()
6969

70-
name = "DataFairness"
7170
required_artifacts = {"data", "sensitive_feature"}
7271

7372
def _setup(self):
Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
matplotlib.use(backend)
1616

1717

18-
class DataProfiling(Evaluator):
18+
class DataProfiler(Evaluator):
1919
"""Data profiling module for Credo AI.
2020
2121
This evaluator runs the pandas profiler on a data. Pandas profiler calculates a number
@@ -31,7 +31,6 @@ class DataProfiling(Evaluator):
3131
Passed to pandas_profiling.ProfileReport
3232
"""
3333

34-
name = "DataProfiler"
3534
required_artifacts = {"data"}
3635

3736
def __init__(self, dataset_name=None, **profile_kwargs):
@@ -40,14 +39,11 @@ def __init__(self, dataset_name=None, **profile_kwargs):
4039
super().__init__()
4140

4241
def _setup(self):
43-
self.data_to_eval = self.data
44-
45-
self.data = pd.concat([self.data_to_eval.X, self.data_to_eval.y], axis=1)
42+
self.data_to_profile = pd.concat([self.data.X, self.data.y], axis=1)
4643
return self
4744

4845
def _validate_arguments(self):
4946
check_data_instance(self.data, TabularData)
50-
5147
return self
5248

5349
def get_html_report(self):
@@ -67,4 +63,4 @@ def evaluate(self):
6763
def _create_reporter(self):
6864
default_kwargs = {"title": "Dataset", "minimal": True}
6965
default_kwargs.update(self.profile_kwargs)
70-
return ProfileReport(self.data, **default_kwargs)
66+
return ProfileReport(self.data_to_profile, **default_kwargs)

credoai/evaluators/equity.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ class DataEquity(Evaluator):
4141
The significance value to evaluate statistical tests
4242
"""
4343

44-
name = "DataEquity"
4544
required_artifacts = {"data", "sensitive_feature"}
4645

4746
def __init__(self, p_value=0.01):
@@ -324,7 +323,6 @@ def __init__(self, use_predict_proba=False, p_value=0.01):
324323
self.use_predict_proba = use_predict_proba
325324
super().__init__(p_value)
326325

327-
name = "ModelEquity"
328326
required_artifacts = {"model", "assessment_data", "sensitive_feature"}
329327

330328
def _setup(self):

credoai/evaluators/evaluator.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ def __init__(self):
1717
self._results = None
1818
self.artifact_keys = []
1919
self.logger = global_logger
20+
self.metadata = {}
21+
22+
@property
23+
def name(self):
24+
return self.__class__.__name__
2025

2126
@property
2227
def results(self):
@@ -36,12 +41,6 @@ def results(self, results):
3641
raise ValidationError("All results must be EvidenceContainers")
3742
self._results = results
3843

39-
@property
40-
@abstractmethod
41-
def name(self):
42-
"""Used to define a unique identifier for the specific evaluator"""
43-
pass
44-
4544
@property
4645
@abstractmethod
4746
def required_artifacts(self):
@@ -106,11 +105,20 @@ def get_container_info(self, labels: dict = None, metadata: dict = None):
106105
return info
107106

108107
def _base_container_info(self):
109-
return {"labels": {"evaluator": self.name}, "metadata": self._get_artifacts()}
108+
meta = {**self.metadata, **self._get_artifacts()}
109+
labels = {"evaluator": self.name}
110+
if "dataset_type" in meta:
111+
labels["dataset_type"] = meta["dataset_type"]
112+
return {"labels": labels, "metadata": meta}
110113

111114
def _get_artifacts(self):
112115
artifacts = {}
113-
save_keys = {"model": "model_name"}
116+
save_keys = {
117+
"model": "model_name",
118+
"data": "data_name",
119+
"assessment_data": "assessment_data_name",
120+
"training_data": "training_data_name",
121+
}
114122
for k in self.artifact_keys:
115123
save_key = save_keys.get(k, k)
116124
try:

credoai/evaluators/fairness.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def __init__(
6262
self.fairness_prob_metrics = None
6363
super().__init__()
6464

65-
name = "ModelFairness"
6665
required_artifacts = {"model", "data", "sensitive_feature"}
6766

6867
def _setup(self):
@@ -105,11 +104,9 @@ def evaluate(self):
105104

106105
if disaggregated_thresh_results is not None:
107106
for key, df in disaggregated_thresh_results.items():
108-
df.name = key
107+
labels = {**sens_feat_label, **{"metric_type": key}}
109108
self._results.append(
110-
TableContainer(
111-
df, **self.get_container_info(labels=sens_feat_label)
112-
)
109+
TableContainer(df, **self.get_container_info(labels=labels))
113110
)
114111

115112
return self
@@ -198,12 +195,15 @@ def get_disaggregated_threshold_performance(self):
198195
var_name="type",
199196
)
200197

201-
to_return = defaultdict(pd.DataFrame)
198+
to_return = defaultdict(list)
202199
for i, row in df.iterrows():
203-
label = f'{row["type"]}_disaggregated_performance'
204200
tmp_df = row["value"]
205201
tmp_df = tmp_df.assign(**row.drop("value"))
206-
to_return[label] = pd.concat([to_return[label], tmp_df])
202+
to_return[row["type"]].append(tmp_df)
203+
for key in to_return.keys():
204+
df = pd.concat(to_return[key])
205+
df.name = "threshold_dependent_disaggregated_performance"
206+
to_return[key] = df
207207
return to_return
208208

209209
def get_fairness_results(self):
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
"""Feature Drift evaluator"""
2+
from credoai.artifacts import ClassificationModel
3+
from credoai.evaluators import Evaluator
4+
from credoai.evaluators.utils.validation import check_requirements_existence
5+
from credoai.evidence import MetricContainer
6+
from credoai.evidence.containers import TableContainer
7+
from credoai.modules.credoai_metrics import population_stability_index
8+
from pandas import DataFrame, Series
9+
10+
11+
class FeatureDrift(Evaluator):
12+
"""
13+
Measure Feature Drift using population stability index.
14+
15+
This evaluator measures feature drift in:
16+
17+
1. Model prediction: the prediction for the assessment dataset is compared
18+
to the prediction for the training dataset.
19+
In the case of classifiers, the prediction is performed with predict proba if available.
20+
If it is not available, the prediction is treated like a categorical variable, see the
21+
processing of categorical variables in the item below.
22+
23+
2. Dataset features: 1 to 1 comparison across all features for the datasets. This is also
24+
referred to as "characteristic stability index" (CSI).
25+
- Numerical features are directly fed into the population_stability_index metric, and
26+
binned according to the parameters specified at init time.
27+
- Categorical features percentage distribution is manually calculated. The % amount of
28+
samples per each class is calculated and then fed into the population_stability_index metric.
29+
The percentage flag in the metric is set to True, to bypass the internal binning process.
30+
31+
32+
Parameters
33+
----------
34+
buckets : int, optional
35+
Number of buckets to consider to bin the predictions, by default 10
36+
buckettype : Literal["bins", "quantiles"]
37+
Type of strategy for creating buckets, bins splits into even splits,
38+
quantiles splits into quantiles buckets, by default "bins"
39+
csi_calculation : bool, optional
40+
Calculate characteristic stability index, i.e., PSI for all features in the datasets,
41+
by default False
42+
"""
43+
44+
def __init__(self, buckets: int = 10, buckettype="bins", csi_calculation=False):
45+
46+
self.bucket_number = buckets
47+
self.buckettype = buckettype
48+
self.csi_calculation = csi_calculation
49+
self.percentage = False
50+
super().__init__()
51+
52+
required_artifacts = {"model", "assessment_data", "training_data"}
53+
54+
def _validate_arguments(self):
55+
check_requirements_existence(self)
56+
57+
def _setup(self):
58+
# Default prediction to predict method
59+
prediction_method = self.model.predict
60+
if isinstance(self.model, ClassificationModel):
61+
if hasattr(self.model, "predict_proba"):
62+
prediction_method = self.model.predict_proba
63+
else:
64+
self.percentage = True
65+
66+
self.expected_prediction = prediction_method(self.training_data.X)
67+
self.actual_prediction = prediction_method(self.assessment_data.X)
68+
69+
# Create the bins manually for categorical prediction if predict_proba
70+
# is not available.
71+
if self.percentage:
72+
(
73+
self.expected_prediction,
74+
self.actual_prediction,
75+
) = self._create_bin_percentage(
76+
self.expected_prediction, self.actual_prediction
77+
)
78+
79+
def evaluate(self):
80+
prediction_psi = self._calculate_psi_on_prediction()
81+
self.results = [MetricContainer(prediction_psi, **self.get_container_info())]
82+
if self.csi_calculation:
83+
csi = self._calculate_csi()
84+
self.results.append(TableContainer(csi, **self.get_container_info()))
85+
return self
86+
87+
def _calculate_psi_on_prediction(self) -> DataFrame:
88+
"""
89+
Calculate the psi index on the model prediction.
90+
91+
Returns
92+
-------
93+
DataFrame
94+
Formatted for metric container.
95+
"""
96+
psi = population_stability_index(
97+
self.expected_prediction,
98+
self.actual_prediction,
99+
percentage=self.percentage,
100+
buckets=self.bucket_number,
101+
buckettype=self.buckettype,
102+
)
103+
res = DataFrame({"value": psi, "type": "population_stability_index"}, index=[0])
104+
return res
105+
106+
def _calculate_csi(self) -> DataFrame:
107+
"""
108+
Calculate psi for all the columns in the dataframes.
109+
110+
Returns
111+
-------
112+
DataFrame
113+
Formatted for the table container.
114+
"""
115+
columns_names = list(self.assessment_data.X.columns)
116+
psis = {}
117+
for col_name in columns_names:
118+
train_data = self.training_data.X[col_name]
119+
assess_data = self.assessment_data.X[col_name]
120+
if self.assessment_data.X[col_name].dtype == "category":
121+
train, assess = self._create_bin_percentage(train_data, assess_data)
122+
psis[col_name] = population_stability_index(train, assess, True)
123+
else:
124+
psis[col_name] = population_stability_index(train_data, assess_data)
125+
psis = DataFrame.from_dict(psis, orient="index")
126+
psis = psis.reset_index()
127+
psis.columns = ["feature_names", "value"]
128+
psis.name = "Characteristic Stability Index"
129+
return psis
130+
131+
@staticmethod
132+
def _create_bin_percentage(train: Series, assess: Series) -> tuple:
133+
"""
134+
In case of categorical values proceed to count the instances
135+
of each class and divide by the total amount of samples to get
136+
the ratios.
137+
138+
Parameters
139+
----------
140+
train : Series
141+
Array of values, dtype == category
142+
assess : Series
143+
Array of values, dtype == category
144+
145+
Returns
146+
-------
147+
tuple
148+
Class percentages for both arrays
149+
"""
150+
len_training = len(train)
151+
len_assessment = len(assess)
152+
train_bin_perc = train.value_counts() / len_training
153+
assess_bin_perc = assess.value_counts() / len_assessment
154+
return train_bin_perc, assess_bin_perc

0 commit comments

Comments
 (0)