-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathoptimizations.py
409 lines (349 loc) · 17.3 KB
/
optimizations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
from myutils import get_rec_pid, get_path_type
from metrics import *
def sort_by_LIR(path_full):
return LIR_single(path_full[2])
def sort_by_SEP(path_full):
return SEP_single(path_full[2])
# Soft optimization LIR, for every user topk predicted by the baseline,
# get the predicted paths for every item and change the explanation according to LIR motivations
def soft_optimization_LIR(path_data):
pred_paths = path_data.pred_paths
for uid, topk in path_data.uid_topk.items():
#Retrive topk explainations without changin the selected pids
for pid in topk:
pred_paths[uid][pid].sort(key=lambda x: LIR_single(path_data, x[-1]), reverse=True)
path_data.uid_pid_explaination[uid][pid] = pred_paths[uid][pid][0][-1]
# Soft optimization LIR, for every user topk predicted by the baseline,
# get the predicted paths for every item and change the explanation according to SEP motivations
def soft_optimization_SEP(path_data):
pred_path = path_data.pred_paths
for uid, topk in path_data.uid_topk.items():
#Retrive topk explainations without changin the selected pids
for pid in topk:
pred_path[uid][pid].sort(key=lambda x: SEP_single(path_data, x[-1]), reverse=True)
path_data.uid_pid_explaination[uid][pid] = pred_path[uid][pid][0][-1]
def soft_optimization_ETD(path_data):
pred_path = path_data.pred_paths
for uid, topk in path_data.uid_topk.items():
path_data.uid_pid_explaination[uid] = {}
ptype_seen = set()
for pid in topk:
curr_size = len(path_data.uid_pid_explaination[uid])
current_item_pred_paths = pred_path[uid][pid]
current_item_pred_paths.sort(key=lambda x: x[1]) #Sort for probability
for path in current_item_pred_paths:
ptype = get_path_type(path[-1])
if ptype not in ptype_seen:
path_data.uid_pid_explaination[uid][pid] = path[-1]
ptype_seen.add(ptype)
break
# No different path have been found
if curr_size == len(path_data.uid_pid_explaination[uid]):
path_data.uid_pid_explaination[uid][pid] = current_item_pred_paths[0][-1]
#LIR Alpha optimization
def optimize_LIR(path_data, alpha):
pred_path = path_data.pred_paths
#Pred_paths {uid: {pid: [[path_score, path_prob_path], ..., [path_score, path_prob_path]], ...,
# pidn: [[path_score, path_prob_path], ..., [path_score, path_prob_path]]]}, ..., uidn: {pid: ...}}
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
#Create candidate list
for pid, path_list in pid_list.items():
candidates.extend(path_list)
candidates.sort(key=lambda candidate: (candidate[0] * (1-alpha)) + (LIR_single(path_data, candidate[-1]) * alpha), reverse=True)
#Pick the best items
for candidate in candidates:
rec_pid = get_rec_pid(candidate)
if rec_pid in best_candidates_pids: continue
best_candidates.append(candidate)
best_candidates_pids.add(rec_pid)
if len(best_candidates) == 10: break
#if len(best_candidates) < 10:
# print("LSEPS THAN 10!")
#Reorder topk by path_score
best_candidates.sort(key=lambda candidate: candidate[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
#SEP Alpha optimization
def optimize_SEP(path_data, alpha):
pred_paths = path_data.pred_paths
for uid, pid_list in pred_paths.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list.sort(key=lambda x: x[1], reverse=True)
candidates.extend(path_list)
candidates.sort(key=lambda x: (x[0] * (1-alpha)) + (SEP_single(path_data, x[-1]) * alpha), reverse=True)
#Pick the best items
for candidate in candidates:
rec_pid = get_rec_pid(candidate)
if rec_pid in best_candidates_pids: continue
best_candidates.append(candidate)
best_candidates_pids.add(rec_pid)
if len(best_candidates) == 10: break
#if len(best_candidates) < 10:
# print("LSEPS THAN 10!")
#Reorder topk by path_score
best_candidates.sort(key=lambda candidate: candidate[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
#ETD Alpha optimization
def optimize_ETD(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
#Populate candidate list
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explaination time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: x[0],
reverse=True)
#Search the best for path_score among all the top of every bin, if the paths isn't already seen in the best_candidate give him a alpha bonus
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
#LIR+SEP Optimization
def optimize_LIR_SEP(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
# Create candidate list
for pid, path_list in pid_list.items():
candidates.extend(path_list)
# Normalize scores between 0 and 1
scores_list = [candidate[0] for candidate in candidates]
min_score = min(scores_list)
max_score = max(scores_list)
for candidate in candidates:
candidate[0] = (candidate[0] - min_score) / (max_score - min_score)
candidates.sort(key=lambda x: (x[0] * (1-alpha)) + ((LIR_single(path_data, x[-1]) + SEP_single(path_data, x[-1])) * alpha),
reverse=True)
# Pick the best items
for candidate in candidates:
rec_pid = get_rec_pid(candidate)
if rec_pid in best_candidates_pids: continue
best_candidates.append(candidate)
best_candidates_pids.add(rec_pid)
if len(best_candidates) == 10: break
#if len(best_candidates) < 10:
# print("LSEPS THAN 10!")
# Reorder topk by path_score
best_candidates.sort(key=lambda candidate: candidate[0], reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
def optimize_ETD_LIR(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explaination time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: (x[0] * (1-alpha)) + (LIR_single(path_data, x[-1]) * alpha),
reverse=True)
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on path_score
best_candidates.sort(key=lambda candidate: candidate[0], reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
#ETD+SEP Alpha optimization
def optimize_ETD_SEP(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explaination time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(key=lambda x: (x[0] * (1-alpha)) + (SEP_single(path_data, x[-1]) * alpha),
reverse=True)
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}
#ETD+SEP+LIR Alpha optimization
def optimize_ETD_SEP_LIR(path_data, alpha):
pred_path = path_data.pred_paths
for uid, pid_list in pred_path.items():
candidates = []
best_candidates = []
best_candidates_pids = set()
for pid, path_list in pid_list.items():
path_list = pred_path[uid][pid]
candidates.extend(path_list)
# Create a bin for every path type and insert in them every path of that type
bins = {}
for candidate in candidates:
candidate_path_type = get_path_type(candidate[-1])
if get_path_type(candidate[-1]) not in bins:
bins[candidate_path_type] = []
bins[candidate_path_type].append(candidate)
# Sort every path type bin by a mixed score based on explaination time relevance and item relevance
for bin_type, path_list in bins.items():
path_list.sort(
key=lambda x: (x[0] * (1 - alpha)) + ((LIR_single(path_data, x[-1]) + SEP_single(path_data, x[-1])) * alpha),
reverse=True)
ptype_seen = set()
while len(best_candidates) < 10:
best_type = ""
best_score = -1
for bin_type, path_list in bins.items():
if len(path_list) == 0: continue
candidate = path_list[0]
rec_pid = get_rec_pid(path_list[0][-1])
while rec_pid in best_candidates_pids:
path_list.pop(0)
if len(path_list) == 0: break
candidate = path_list[0]
rec_pid = get_rec_pid(candidate[-1])
if len(path_list) == 0: continue
bonus = alpha if get_path_type(candidate[-1]) not in ptype_seen else 0
score = candidate[0] + bonus
if score > best_score:
best_score = score
best_type = get_path_type(candidate[-1])
if best_type == "":
# print("Less than 10: {}".format(len(best_candidates)))
break
best_candidates.append(bins[best_type][0])
best_candidates_pids.add(get_rec_pid(bins[best_type][0][-1]))
ptype_seen.add(best_type)
bins[best_type].pop(0)
if len(bins[best_type]) == 0:
bins.pop(best_type)
# Rearrange the topk based on the metric
best_candidates.sort(key=lambda x: x[0],
reverse=True)
# Update the topk with the reranked one
path_data.uid_topk[uid] = [get_rec_pid(candidate) for candidate in best_candidates]
path_data.uid_pid_explaination[uid] = {get_rec_pid(candidate): candidate[-1] for candidate in best_candidates}