-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase.py
206 lines (170 loc) · 6.3 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 22 14:23:23 2015
@author: thalita
"""
import abc
from heapq import heappush, heappop
from scipy import sparse
import numpy as np
import pandas as pd
from utils import to_gzpickle, read_gzpickle
class BaseDatabase(object):
__metaclass__ = abc.ABCMeta
@property
def matrix(self):
return self._matrix
@matrix.setter
def matrix(self, val):
self._matrix = val
@abc.abstractmethod
def get_matrix(self):
"return data matrix"
return self.matrix
@abc.abstractmethod
def n_users(self):
return self.matrix.shape[0]
@abc.abstractmethod
def get_rating(self, user_id, item_id):
return self.matrix[user_id, item_id]
@abc.abstractmethod
def get_user_vector(self, user_id):
"return a 2D array with user ratings"
return self.matrix[user_id, :]
@abc.abstractmethod
def get_item_vector(self, item_id):
"return a 2D array with item ratings"
return self.matrix[:, item_id]
@abc.abstractmethod
def get_unrated_items(self, user_id):
"return unrated item ids for user"
return [idx for idx, rating in enumerate(self.matrix[user_id, :])
if rating == 0]
@abc.abstractmethod
def get_rated_items(self, user_id):
"return items rated by user_id user"
return [idx for idx, rating in enumerate(self.matrix[user_id, :])
if rating > 0]
@abc.abstractmethod
def get_rated_users(self, item_id):
"return users who did not rate item_id user"
return [idx for idx, rating in enumerate(self.matrix[:, item_id])
if rating > 0]
class BaseRecommender(object):
__metaclass__ = abc.ABCMeta
@property
def database(self):
"Database object"
return self._database
@database.setter
def database(self, val):
self._database = val
@abc.abstractmethod
def recommend(self, target_user, topN):
"return recomendation list for target_user"
return
@abc.abstractmethod
def save(self, filepath):
d = self.__dict__
del d['_database']
to_gzpickle(d, filepath)
@abc.abstractmethod
def load(self, filepath, database):
d = read_gzpickle(filepath)
for atr, val in d.items():
self.__setattr__(atr, val)
self.database = database
def is_ensemble(self):
return False
def config(self):
return {'n_users': self.database.n_users(),
'n_items': self.database.n_items()}
class RatingPredictor(BaseRecommender):
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def fit(self):
"learn recommender model (neighborhood, matrix factorization, etc)"
return self
@abc.abstractmethod
def predict(self, target_user, target_item):
return
def recommend(self, target_user, how_many=np.inf, threshold=0,
candidate_items=None):
unrated = self.database.get_unrated_items(target_user) \
if candidate_items is None else candidate_items
ratings = []
for item in unrated:
# add tuples (-rating, item) to min heap
pred_rating = self.predict(target_user, item)
if pred_rating > threshold:
heappush(ratings, (-pred_rating, item))
if ratings is []:
print('No recomendation could be made for this user')
print(ratings)
lenght = min(how_many, len(ratings))
rec_list = []
for _ in range(lenght):
# pop tuple (item_id, rating) from ratings heap
# and push into rec_list
pred_rating, item = heappop(ratings)
rec_list.append((item, -pred_rating))
return rec_list
class SavedRecommendations(RatingPredictor):
def __init__(self):
self.pred_ratings = []
self.lists = []
self.config = None
self.database = None
def candidate_items(self, user, n_items, split):
candidates = None
if n_items > 2000:
hidden_items = [i for i,r in split.valid[user] + split.test[user] \
+ split.tuning[user]]
n_candidates = int(0.01*n_items + 2000) \
- len(hidden_items)
np.random.seed(user + sum(hidden_items))
candidates = [item for item in range(n_items)
if item not in hidden_items]
candidates = hidden_items + \
np.random.choice(candidates, size=n_candidates,
replace=False).tolist()
return candidates
def save(self, filepath, RS, split):
lists = {}
for user in range(RS.database.n_users()):
candidates = self.candidate_items(user, RS.database.n_items(),
split)
# ask for recommendations w/ threshold=0
# to get all the predratings
lists[user] = RS.recommend(user, threshold=0,
candidate_items=candidates)
config = RS.config()
print('user', user+1, 'of', RS.database.n_users(), end='\r',
flush=True)
print('Saving', filepath)
to_gzpickle((lists, config), filepath)
print('Done!')
def load(self, filepath):
self.lists, self.config = read_gzpickle(filepath)
shape = (self.config['n_users'], self.config['n_items'])
self.pred_ratings = np.zeros(shape=shape)
for user in self.lists:
for item, rating in self.lists[user]:
self.pred_ratings[user, item] = rating
def predict(self, target_user, target_item):
return self.pred_ratings[target_user, target_item]
def recommend(self, target_user, how_many=np.inf, threshold=0,
candidate_items=None):
if candidate_items is not None:
candidate_items = set(candidate_items)
rec_list = self.lists[target_user]
out_list = []
count = 0
for item, rating in rec_list:
if ((candidate_items is not None and item in candidate_items)
or candidate_items is None) and rating > threshold:
out_list.append((item, rating))
count += 1
if count >= how_many:
break
return out_list