-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathcounterfactual_explain.py
261 lines (204 loc) · 8.81 KB
/
counterfactual_explain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/usr/bin/python3
"""
Counterfactual explanation of a user query.
Based on application on the knapsack problem of Korikov, A., & Beck, J. C. mming, CP2021. Counterfactual Explanations via Inverse Constraint Programming.
Usecase:
1) Some optimal solution x* is provided to the user by a constraint optimization solver.
2) User enters a query to change solution variables x_i to a new value.
The assigment of a subset of variables are called a foil.
This extra set of constraints is used to generate a new optimal solution x_d
3) Using inverse optimization, we calculate the values of constraints in the objective function which result in x_d being the optimal solution.
Extra constraint: only values corresponding to foil items are allowed to change.
4) The user is presented with the constraints which would have resulted in an optimal solution given the foil constraints.
Intuition:
Iteratively find new constraints vector d with minimal change from c.
Improve the set of constraints in every iteration until x_d is optimal for the constraints given the objective function.
Given:
A constraint optimization problem (c, f, X) with an optimal solution x*
A set of foil constraints assigning a value to variables in x*, resulting in x_d
Find:
d such that min(f(d,X)) == x_d and ||c - d|| is minimal
The algorithm consists of 2 alternating problems: the master problem (MP), and the sub problem (SP)
Algorithm:
1 S = {}
2 Solve MP to obtain optimal d*
3 Solve SP with d* as c to obtain x_0
4 if objective(x_d) > objective(x_0):
add x_0 to S
go to 2
else:
return d*
Master problem:
Constraints:
- constraints of original forward problem
- objective(d*, x) >= objective(d*, x_0) forall x_0 in S
- foil constraints assigning values to x_i
- foil contraints restricting the altering of c to match the foil constraint indices
Objective:
- Minimize || c - d* ||_1
Sub problem:
The original forward problem
"""
from cpmpy import *
import numpy as np
INFINITY = 10000
verbose = True
np.random.seed(0)
def main():
n = 10 # Number of items in the knapsack
m = 5 # Number of items to change in the knapsack
values, weights, capacity = generate_knapsack_model(n)
x = solve_knapsack_problem(values, weights, capacity)
print_knapsack_model(values, weights, capacity, x)
# We emulate a user query by selecting random items
# These items are assigned the opposite value of the current solution
# While generating this query, we ensure there exist a feasable solution.
x_user, foil_items = generate_foil_knapsack(values, weights, capacity, x, m)
# Pretty print user query
pp_uquery(x, foil_items)
# Find the values vector such that x_foil is optimal
d_star = inverse_optimize(values, weights, capacity, x_user, foil_items)
print(
f"\n\nValues {d_star} results in a solution satisfying the user query being optimal"
)
print(f"Optimal knapsack satisfying user query: {x_user}")
print(f"Value of objective function using d* = {sum(d_star * x_user)}")
def solve_knapsack_problem(values, weights, capacity):
"""
Ordinary 0-1 knapsack problem
Solve for vector x ∈ {T,F}^n
Based on the Numberjack model of Hakan Kjellerstrand
"""
x = boolvar(len(values), name="x")
model = Model([sum(x * weights) <= capacity], maximize=sum(x * values))
if model.solve() is not False:
return x.value()
else:
raise ValueError("Model is UNSAT")
def generate_knapsack_model(n=10):
"""
Generation of a knapsack model
Using same generation parameters as Korikov et al.
"""
R = 1000
values = np.random.randint(1, R, n)
weights = np.random.randint(1, R, n)
capacity = int(max([0.5 * sum(weights), R]))
return values, weights, capacity
def generate_foil_knapsack(values, weights, capacity, x, m, tries=1):
"""
Generate a set of foil constraints
Pick m items and assign the opposite value.
If this results in an unfeasable assignment (surpassing the capacity), try again
@return A model
"""
n = len(x)
foil_idx = np.random.choice(n, m, replace=False)
foil_vals = np.abs(1 - x[foil_idx])
if sum(foil_vals * weights[foil_idx]) > capacity:
if verbose:
print(f"\rGenerated unfeasable user query, retrying...({tries})", end="")
return generate_foil_knapsack(values, weights, capacity, x, m, tries + 1)
else:
return (
extend_to_full_solution(values, weights, capacity, foil_idx, foil_vals),
foil_idx,
)
def extend_to_full_solution(values, weights, capacity, foil_idx, foil_vals):
"""
Extend a given set of foil constraints to a full solution of the knapsack
Formally:
Given v and X, solve the COP (c, v ∩ X)
"""
xv = boolvar(shape=len(values), name="xv")
constraints = [xv[foil_idx] == foil_vals]
constraints += [sum(xv * weights) <= capacity]
model = Model(constraints, maximize=sum(xv * values))
if model.solve() is not False:
return xv.value()
def make_master_problem(values, foil_idx):
"""
Creates the master problem.
Returns both the model itself as well as the variables used in it.
This way the variables can be used to add new constraints outside this building function.
"""
d = intvar(0, INFINITY, values.shape, name="d")
# Minimize the change to the values vector
m = Model(minimize=np.linalg.norm(values - d, ord=1))
# Ensure values are only modified at foil indices
m += [d[i] == values[i] for i in range(len(values)) if i not in foil_idx]
return m, d
def make_sub_problem(values, weights, capacity):
"""
Creates the sub problem
Returns both the model itself as well as the variables in it.
This way the variables can be used to add new constraints outside this building function.
"""
x = boolvar(shape=len(values))
return Model([sum(weights * x) <= capacity]), x
def inverse_optimize(values, weights, capacity, x_d, foil_idx):
"""
Master problem: iteratively find better values for the c vector (new values are vector d)
Mapping of variable names to names in the paper (TODO: link papper)
- c = values
- d = d
@param x_d: A feasable solution found in the foil set X_v
@param foil_idx: A vector containing the items on which the user asked an explanation
All other items must retain their original values
"""
if verbose:
print(f"\n\n{'='*10} Solving the master problem {'='*10}")
sub_model, x_0 = make_sub_problem(values, weights, capacity)
master_model, d = make_master_problem(values, foil_idx)
i = 1
while master_model.solve() is not False:
d_star = d.value()
sub_model.maximize(sum(x_0 * d.value()))
sub_model.solve()
if verbose:
print(f"\nStarting iteration {i}")
print(f"d* = {d_star}")
print(f"d* * x_d = {sum(d_star * x_d)}")
print(f"d* * x_0 = {sum(d_star * x_0.value())}")
if sum(d_star * x_d) >= sum(d_star * x_0.value()):
return d_star
else:
# Convert boolean arrays to integer arrays
x_d_int = x_d.astype(int)
x_0_val_int = x_0.value().astype(int)
# Add constraint using integer coefficients
master_model += [sum(d * x_d_int) >= sum(d * x_0_val_int)]
i += 1
raise ValueError("Master model is UNSAT!")
############################################################################
# All functions below this line are purely used for pretty printing results#
############################################################################
def print_knapsack_model(values, weights, capacity, x):
"""
Pretty prints a knapsack model.
"""
print("Solution to the following knapsack problem")
print("Values =", values)
print("Weights = ", weights)
print(f"Capacity: {capacity}")
print("\nis:", x)
print(f"Resulting in an objective value of {sum(x * values)}")
print(f"Capacity used: {sum(x*weights)}")
def pp_uquery(x, f_items):
"""
Function to pretty print the user query to solver.
Has no computational effects
"""
include = sorted([str(a) for a in f_items[x[f_items] == 0]])
exclude = sorted([str(a) for a in f_items[x[f_items] == 1]])
print(f"\n\n{'='*10} User Query {'='*10}")
print(f"I would like to change the following to the knapsack you provided:")
if len(exclude) > 0:
print(f"Leave out item{'s' if len(exclude) > 1 else ''} {','.join(exclude)}")
if len(include) > 0:
print(f"Put in item{'s' if len(include) > 1 else ''} {','.join(include)}")
print(
"How should the values corresponding to these items change to make this assignment optimal?"
)
if __name__ == "__main__":
main()