1+ from typing import Dict
2+ import pandas as pd
3+ import numpy as np
4+
5+ from methods .api import RecourseMethod
6+ from methods .processing import merge_default_parameters
7+ from models .api import MLModel
8+
9+ # Import from our library folder
10+ from .library .data import data_NICE
11+ from .library .distance import HEOM , MinMaxDistance , NearestNeighbour
12+ from .library .heuristic import best_first
13+ from .library .reward import SparsityReward , ProximityReward , PlausibilityReward
14+ from .library .autoencoder import AutoEncoder # We'll create this
15+
16+
17+ class NICE (RecourseMethod ):
18+ """
19+ NICE: Nearest Instance Counterfactual Explanations
20+
21+ Implementation of the NICE algorithm from:
22+ Brughmans et al. (2024) "NICE: an algorithm for nearest instance
23+ counterfactual explanations"
24+
25+ Parameters
26+ ----------
27+ mlmodel : MLModel
28+ Black-box classifier
29+ hyperparams : dict
30+ - "optimization": str, default: "sparsity"
31+ One of ["none", "sparsity", "proximity", "plausibility"]
32+ - "distance_metric": str, default: "HEOM"
33+ Distance metric to use
34+ - "num_normalization": str, default: "minmax"
35+ Normalization for numerical features ("minmax" or "std")
36+ - "justified_cf": bool, default: True
37+ If True, only use correctly classified training instances
38+ """
39+
40+ _DEFAULT_HYPERPARAMS = {
41+ "optimization" : "sparsity" ,
42+ "distance_metric" : "HEOM" ,
43+ "num_normalization" : "minmax" ,
44+ "justified_cf" : True ,
45+ }
46+
47+ def __init__ (self , mlmodel : MLModel , hyperparams : Dict = None ):
48+ # Check backend compatibility
49+ # NICE is model-agnostic, so we accept all backends
50+ super ().__init__ (mlmodel )
51+
52+ # Merge hyperparameters
53+ checked_hyperparams = merge_default_parameters (
54+ hyperparams , self ._DEFAULT_HYPERPARAMS
55+ )
56+
57+ self .optimization = checked_hyperparams ["optimization" ]
58+ self .distance_metric_name = checked_hyperparams ["distance_metric" ]
59+ self .num_normalization = checked_hyperparams ["num_normalization" ]
60+ self .justified_cf = checked_hyperparams ["justified_cf" ]
61+
62+ # Get training data
63+ df_train = mlmodel .data .df_train
64+ X_train = df_train .drop (columns = ["y" ]).values # Convert to numpy
65+ y_train = df_train ["y" ].values
66+
67+ # Get feature information from benchmark's data catalog
68+ self .categorical_features = mlmodel .data .categorical
69+ self .continuous_features = mlmodel .data .continuous
70+
71+ # Convert feature names to indices
72+ feature_names = mlmodel .data .df_train .drop (columns = ["y" ]).columns .tolist ()
73+ self .cat_feat_idx = [feature_names .index (f ) for f in self .categorical_features ]
74+ self .num_feat_idx = [feature_names .index (f ) for f in self .continuous_features ]
75+
76+ # Store for later use
77+ self .feature_names = feature_names
78+ self .mlmodel = mlmodel
79+
80+ # Initialize NICE data object
81+ self .data = data_NICE (
82+ X_train = X_train ,
83+ y_train = y_train ,
84+ cat_feat = self .cat_feat_idx ,
85+ num_feat = self .num_feat_idx ,
86+ predict_fn = self ._predict_fn_wrapper ,
87+ justified_cf = self .justified_cf ,
88+ eps = 1e-10
89+ )
90+
91+ # Initialize distance metric
92+ if self .num_normalization == "minmax" :
93+ from .library .distance import MinMaxDistance as NumDistance
94+ else :
95+ from .library .distance import StandardDistance as NumDistance
96+
97+ self .distance_metric = HEOM (self .data , NumDistance )
98+
99+ # Initialize nearest neighbor finder
100+ self .nearest_neighbour = NearestNeighbour (self .data , self .distance_metric )
101+
102+ # Initialize optimizer if needed
103+ if self .optimization != "none" :
104+ # Initialize reward function
105+ if self .optimization == "sparsity" :
106+ self .reward_function = SparsityReward (self .data )
107+ elif self .optimization == "proximity" :
108+ self .reward_function = ProximityReward (
109+ self .data ,
110+ distance_metric = self .distance_metric
111+ )
112+ elif self .optimization == "plausibility" :
113+ # Train autoencoder on training data
114+ ae = AutoEncoder (X_train , self .cat_feat_idx , self .num_feat_idx )
115+ self .reward_function = PlausibilityReward (
116+ self .data ,
117+ auto_encoder = ae
118+ )
119+ else :
120+ raise ValueError (f"Unknown optimization: { self .optimization } " )
121+
122+ # Initialize optimizer
123+ self .optimizer = best_first (self .data , self .reward_function )
124+
125+ def _predict_fn_wrapper (self , X ):
126+ """
127+ Wrapper to convert numpy arrays to DataFrames for mlmodel.predict_proba
128+ """
129+ # Convert numpy to DataFrame
130+ df = pd .DataFrame (X , columns = self .feature_names )
131+
132+ # Get predictions
133+ proba = self .mlmodel .predict_proba (df )
134+
135+ return proba
136+
137+ def get_counterfactuals (self , factuals : pd .DataFrame ):
138+ """
139+ Generate counterfactual explanations for given factuals
140+
141+ Parameters
142+ ----------
143+ factuals : pd.DataFrame
144+ Instances to explain (with 'y' column)
145+
146+ Returns
147+ -------
148+ pd.DataFrame
149+ Counterfactual instances
150+ """
151+ counterfactuals_list = []
152+
153+ for index , row in factuals .iterrows ():
154+ # Remove target column if present
155+ factual = row .drop ("y" ) if "y" in row .index else row
156+
157+ # Convert to numpy array
158+ X = factual .values .reshape (1 , - 1 )
159+
160+ # Fit data object to this instance
161+ self .data .fit_to_X (X , target_class = 'other' )
162+
163+ # Find nearest unlike neighbor
164+ NN = self .nearest_neighbour .find_neighbour (self .data .X )
165+
166+ # Optimize if needed
167+ if self .optimization != "none" :
168+ CF = self .optimizer .optimize (NN )
169+ else :
170+ CF = NN
171+
172+ # Convert back to DataFrame
173+ cf_df = pd .DataFrame (CF , columns = self .feature_names )
174+ counterfactuals_list .append (cf_df )
175+
176+ # Concatenate all counterfactuals
177+ df_cfs = pd .concat (counterfactuals_list , ignore_index = True )
178+
179+ # Ensure correct feature order for model
180+ df_cfs = self ._mlmodel .get_ordered_features (df_cfs )
181+
182+ return df_cfs
0 commit comments