From 2af75621e0cc36ce3daf371e25ab0e4a17927297 Mon Sep 17 00:00:00 2001 From: Praveenkumar Date: Sat, 23 Dec 2023 12:05:04 +0530 Subject: [PATCH 1/4] Update explainer_base.py Bug fix to resolve "ValueError: ('Feature', {}, 'has a value outside the dataset.')" caused due to 'genetic' method when used for Private data with a query instance size > 1 Signed-off-by: Praveenkumar --- dice_ml/explainer_interfaces/explainer_base.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dice_ml/explainer_interfaces/explainer_base.py b/dice_ml/explainer_interfaces/explainer_base.py index 349d1959..811126d1 100644 --- a/dice_ml/explainer_interfaces/explainer_base.py +++ b/dice_ml/explainer_interfaces/explainer_base.py @@ -250,12 +250,7 @@ def setup(self, features_to_vary, permitted_range, query_instance, feature_weigh if features_to_vary == 'all': features_to_vary = self.data_interface.feature_names - if permitted_range is None: # use the precomputed default - self.feature_range = self.data_interface.permitted_range - feature_ranges_orig = self.feature_range - else: # compute the new ranges based on user input - self.feature_range, feature_ranges_orig = self.data_interface.get_features_range(permitted_range) - + self.feature_range, feature_ranges_orig = self.data_interface.get_features_range(permitted_range) self.check_query_instance_validity(features_to_vary, permitted_range, query_instance, feature_ranges_orig) return features_to_vary From fb2a5db8451610cfecac1b151f94c647ee777741 Mon Sep 17 00:00:00 2001 From: Praveenkumar Date: Sat, 23 Dec 2023 12:38:52 +0530 Subject: [PATCH 2/4] Update private_data_interface.py Remove duplicate code Signed-off-by: Praveenkumar --- dice_ml/data_interfaces/private_data_interface.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dice_ml/data_interfaces/private_data_interface.py b/dice_ml/data_interfaces/private_data_interface.py index 3fd36856..e48c105d 100644 --- a/dice_ml/data_interfaces/private_data_interface.py +++ b/dice_ml/data_interfaces/private_data_interface.py @@ -182,11 +182,7 @@ def get_features_range(self, permitted_range_input=None, features_dict=None): ranges = {} # Getting default ranges based on the dataset - for feature in features_dict: - if type(features_dict[feature][0]) is int: # continuous feature - ranges[feature] = features_dict[feature] - else: - ranges[feature] = features_dict[feature] + ranges[feature] = features_dict[feature] feature_ranges_orig = ranges.copy() # Overwriting the ranges for a feature if input provided if permitted_range_input is not None: From bf085e22067e1cde1e89f8a055f76bae5a3eb79e Mon Sep 17 00:00:00 2001 From: Praveenkumar Date: Mon, 1 Jan 2024 20:48:41 +0530 Subject: [PATCH 3/4] Update conftest.py Add test query dataset and model Signed-off-by: Praveenkumar --- tests/conftest.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index fdc36b1d..0498b6bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -411,6 +411,34 @@ def _load_custom_vars_dataset_model(): return model +def _load_adult_income_binary_model(): + dataset = helpers.load_adult_income_dataset() + X_train = dataset.drop('income', axis=1) + y_train = dataset["income"] + num_feature_names = ["age", "hours_per_week"] + cat_feature_names = X_train.columns.difference(num_feature_names) + model = create_complex_classification_pipeline( + X_train, y_train, num_feature_names, cat_feature_names) + return model + + +def sample_adult_income_custom_query_11(): + """ + Returns multiple query instance for adult income dataset + """ + data_point = 2 + query_instances = pd.DataFrame({'age': [22]*data_point, + 'workclass': ['Private']*data_point, + 'education': ['HS-grad']*data_point, + 'marital_status': ['Single']*data_point, + 'occupation': ['Service']*data_point, + 'race': ['White']*data_point, + 'gender': ['Female']*data_point, + 'hours_per_week': [45]*data_point}, + index=list(range(data_point))) + return query_instances + + @pytest.fixture(scope='session') def sample_adultincome_query(): """ From 73eee99d3f570134757a06a7f18809e8bdff1f37 Mon Sep 17 00:00:00 2001 From: Praveenkumar Date: Mon, 1 Jan 2024 20:49:08 +0530 Subject: [PATCH 4/4] Update test_explainer_base.py Add test case for the fix Signed-off-by: Praveenkumar --- .../test_explainer_base.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/test_dice_interface/test_explainer_base.py b/tests/test_dice_interface/test_explainer_base.py index e3b63892..e7386338 100644 --- a/tests/test_dice_interface/test_explainer_base.py +++ b/tests/test_dice_interface/test_explainer_base.py @@ -12,7 +12,10 @@ from dice_ml.explainer_interfaces.explainer_base import ExplainerBase from dice_ml.utils import helpers -from ..conftest import _load_custom_testing_binary_model +from ..conftest import (private_data_object, + sample_adult_income_custom_query_11, + _load_adult_income_binary_model, + _load_custom_testing_binary_model) @pytest.mark.parametrize("method", ['random', 'genetic', 'kdtree']) @@ -349,6 +352,20 @@ def test_cfs_type_consistency( assert cf_explanations.cf_examples_list[0].final_cfs_df[col].dtype == sample_custom_query[col].dtype if cf_explanations.cf_examples_list[0].final_cfs_df_sparse is not None: assert cf_explanations.cf_examples_list[0].final_cfs_df_sparse[col].dtype == sample_custom_query[col].dtype + + @pytest.mark.parametrize("method", ["genetic"]) + def test_genetic_private_data(method): + d = private_data_object() + query = sample_adult_income_custom_query_11() + model = _load_adult_income_binary_model() + m = dice_ml.Model(model=model, backend='sklearn') + exp = dice_ml.Dice(d, m, method=method) + + return exp.generate_counterfactuals( + query_instances=query, + total_CFs=1, + desired_class="opposite", + initialization="random") @pytest.mark.parametrize("method", ['random', 'genetic', 'kdtree'])