diff --git a/src/MyMediaLite/ItemRecommendation/ISGD.cs b/src/MyMediaLite/ItemRecommendation/ISGD.cs new file mode 100644 index 00000000..5fb1ca52 --- /dev/null +++ b/src/MyMediaLite/ItemRecommendation/ISGD.cs @@ -0,0 +1,339 @@ +// Copyright (C) 2014 João Vinagre, Zeno Gantner +// +// This file is part of MyMediaLite. +// +// MyMediaLite is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// MyMediaLite is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with MyMediaLite. If not, see . +// +using System; +using C5; +using System.Linq; +using System.Globalization; +using System.Collections.Generic; +using MyMediaLite.DataType; + +namespace MyMediaLite.ItemRecommendation +{ + /// + /// Incremental Stochastic Gradient Descent (ISGD) algorithm for item prediction. + /// + /// + /// + /// Literature: + /// + /// + /// João Vinagre, Alípio Mário Jorge, João Gama: + /// Fast incremental matrix factorization for recommendation with positive-only feedback. + /// UMAP 2014. + /// http://link.springer.com/chapter/10.1007/978-3-319-08786-3_41 + /// + /// + /// + /// + /// Known issues: + /// + /// This algorithm tends to saturate (converges globally to a single value) + /// and slowly degrades with more than a few tens of thousands observations; + /// This algorithm is primarily designed to use with incremental learning, + /// batch behavior has not been thoroughly studied. + /// + /// + /// + /// This algorithm supports (and encourages) incremental updates. + /// + /// + public class ISGD : MF + { + /// Regularization parameter + public double Regularization { get { return regularization; } set { regularization = value; } } + double regularization = 0.032; + + /// Learn rate (update step size) + public float LearnRate { get { return learn_rate; } set { learn_rate = value; } } + float learn_rate = 0.31f; + + /// Multiplicative learn rate decay + /// Applied after each epoch (= pass over the whole dataset) + public float Decay { get { return decay; } set { decay = value; } } + float decay = 1.0f; + + /// Incremental iteration number (if unset assumes the value for batch) + public uint IncrIter { get; set; } + + /// The learn rate used for the current epoch + protected internal float current_learnrate; + + + /// + /// Default constructor + /// + public ISGD () + { + UpdateUsers = true; + UpdateItems = true; + } + + /// + protected override void InitModel() + { + base.InitModel(); + current_learnrate = LearnRate; + IncrIter = NumIter; + } + + /// + public override void Iterate() + { + Iterate(UpdateUsers, UpdateItems); + } + + /// + public override float ComputeObjective() + { + return -1; + } + + /// Iterate once over feedback data and adjust corresponding factors (stochastic gradient descent) + /// true if user factors to be updated + /// true if item factors to be updated + protected virtual void Iterate(bool update_user, bool update_item) + { + for (int index = 0; index < Feedback.Count; index++) + { + int u = Feedback.Users[index]; + int i = Feedback.Items[index]; + + UpdateFactors(u, i, update_user, update_item); + } + + UpdateLearnRate(); + + } + + /// + public override float Predict(int user_id, int item_id) + { + return Predict(user_id, item_id, false); + } + + /// + protected virtual float Predict(int user_id, int item_id, bool bound) + { + if (user_id >= user_factors.dim1 || item_id >= item_factors.dim1) + return float.MinValue; + + float result = DataType.MatrixExtensions.RowScalarProduct(user_factors, user_id, item_factors, item_id); + + if (bound) + { + if (result > 1) + return 1; + if (result < 0) + return 0; + } + return result; + } + + /// Updates after each epoch + protected virtual void UpdateLearnRate() + { + current_learnrate *= Decay; + } + + + /// + public override void AddFeedback(System.Collections.Generic.ICollection> feedback) + { + AddFeedback(feedback,true); + } + + /// + public virtual void AddFeedback(System.Collections.Generic.ICollection> feedback, bool retrain) + { + base.AddFeedback(feedback); + if (retrain) Retrain(feedback); + } + + /// + public override void RemoveFeedback(System.Collections.Generic.ICollection> feedback) + { + base.RemoveFeedback(feedback); + Retrain(feedback); + } + + /// + protected virtual void Retrain(System.Collections.Generic.ICollection> feedback) + { + for (int i = 0; i < IncrIter; i++) + foreach (var entry in feedback) + UpdateFactors(entry.Item1, entry.Item2, UpdateUsers, UpdateItems); + } + + /// + protected override void RetrainUser(int user_id) + { + user_factors.RowInitNormal(user_id, InitMean, InitStdDev); + foreach (int item in Feedback.UserMatrix[user_id]) + for (int i = 0; i < IncrIter; i++) + UpdateFactors(user_id, item, true, false); + } + + /// + protected override void RetrainItem(int item_id) + { + item_factors.RowInitNormal(item_id, InitMean, InitStdDev); + foreach (int user in Feedback.ItemMatrix[item_id]) + for (int i = 0; i < IncrIter; i++) + UpdateFactors(user, item_id, false, true); + } + + /// + protected override void AddUser(int user_id) + { + base.AddUser(user_id); + + user_factors.AddRows(user_id + 1); + user_factors.RowInitNormal(user_id, InitMean, InitStdDev); + } + + /// + protected override void AddItem(int item_id) + { + base.AddItem(item_id); + + item_factors.AddRows(item_id + 1); + item_factors.RowInitNormal(item_id, InitMean, InitStdDev); + } + + + /// + public override void RemoveUser(int user_id) + { + base.RemoveUser(user_id); + + // set user latent factors to zero + user_factors.SetRowToOneValue(user_id, 0); + } + + /// + public override void RemoveItem(int item_id) + { + base.RemoveItem(item_id); + + // set item latent factors to zero + item_factors.SetRowToOneValue(item_id, 0); + } + + /// + /// Performs factor updates for a user and item pair. + /// + /// User_id. + /// Item_id. + /// true to update user factors. + /// true to update item factors. + protected virtual void UpdateFactors(int user_id, int item_id, bool update_user, bool update_item) + { + float err = 1 - Predict(user_id, item_id, false); + + // adjust factors + for (int f = 0; f < NumFactors; f++) + { + float u_f = user_factors[user_id, f]; + float i_f = item_factors[item_id, f]; + + // if necessary, compute and apply updates + if (update_user) + { + double delta_u = err * i_f - Regularization * u_f; + user_factors.Inc(user_id, f, current_learnrate * delta_u); + } + if (update_item) + { + double delta_i = err * u_f - Regularization * i_f; + item_factors.Inc(item_id, f, current_learnrate * delta_i); + } + } + + } + + + /// + public override System.Collections.Generic.IList> Recommend( + int user_id, int n = -1, + System.Collections.Generic.ICollection ignore_items = null, + System.Collections.Generic.ICollection candidate_items = null) + { + if (candidate_items == null) + candidate_items = Enumerable.Range(0, MaxItemID - 1).ToList(); + if (ignore_items == null) + ignore_items = new int[0]; + + System.Collections.Generic.IList> ordered_items; + + if (n == -1) + { + var scored_items = new List>(); + foreach (int item_id in candidate_items) + if (!ignore_items.Contains(item_id)) + { + float error = Math.Abs(1 - Predict(user_id, item_id)); + if (error > float.MaxValue) + error = float.MaxValue; + scored_items.Add(Tuple.Create(item_id, error)); + } + + ordered_items = scored_items.OrderBy(x => x.Item2).ToArray(); + } + else + { + var comparer = new DelegateComparer>( (a, b) => a.Item2.CompareTo(b.Item2) ); + var heap = new IntervalHeap>(n, comparer); + float max_error = float.MaxValue; + + foreach (int item_id in candidate_items) + if (!ignore_items.Contains(item_id)) + { + float error = Math.Abs(1 - Predict(user_id, item_id)); + if (error < max_error) + { + heap.Add(Tuple.Create(item_id, error)); + if (heap.Count > n) + { + heap.DeleteMax(); + max_error = heap.FindMax().Item2; + } + } + } + + ordered_items = new Tuple[heap.Count]; + for (int i = 0; i < ordered_items.Count; i++) + ordered_items[i] = heap.DeleteMin(); + } + + return ordered_items; + } + + + /// + public override string ToString() + { + return string.Format( + CultureInfo.InvariantCulture, + "ISGD num_factors={0} regularization={1} learn_rate={2} num_iter={3} incr_iter={4} decay={5}", + NumFactors, Regularization, LearnRate, NumIter, IncrIter, Decay); + } + + + } +} + diff --git a/src/MyMediaLite/ItemRecommendation/RAISGD.cs b/src/MyMediaLite/ItemRecommendation/RAISGD.cs new file mode 100644 index 00000000..662b9bda --- /dev/null +++ b/src/MyMediaLite/ItemRecommendation/RAISGD.cs @@ -0,0 +1,240 @@ +// Copyright (C) 2013 Zeno Gantner +// +// This file is part of MyMediaLite. +// +// MyMediaLite is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// MyMediaLite is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with MyMediaLite. If not, see . +// +using System; +using C5; +using System.Linq; +using System.Globalization; +using MyMediaLite.DataType; + +namespace MyMediaLite.ItemRecommendation +{ + /// + /// Recency-Adjusted Incremental Stochastic Gradient Descent (RAISGD) algorithm for item prediction. + /// + /// + /// + /// Literature: + /// + /// + /// João Vinagre, Alípio Mário Jorge, João Gama: + /// Collaborative filtering with recency-based negative feedback. + /// ACM SAC 2015. + /// http://dl.acm.org/citation.cfm?id=2695998 + /// + /// + /// + /// + /// This algorithm extends ISGD to accept recency-based negative feedback in incremental updates. + /// + /// + public class RAISGD : ISGD + { + /// Apply user-based negative feedback in user factors + public bool NegUsersInUsers { get; set; } + /// Apply item-based negative feedback in user factors + public bool NegItemsInUsers { get; set; } + /// Apply user-based negative feedback in item factors + public bool NegUsersInItems { get; set; } + /// Apply item-based negative feedback in item factors + public bool NegItemsInItems { get; set; } + + /// Number of negative examples for each positive example. + public int NegFeedbackAmount { get { return neg_feedback_amount; } set { neg_feedback_amount = value; } } + int neg_feedback_amount = 1; + + bool negate_users; + bool negate_items; + + /// Item queue for item-based negative feedback (contains items ordered by time of latest occurrence) + protected HashedLinkedList item_queue; + /// Item queue for user-based negative feedback (contains users ordered by time of latest occurrence) + protected HashedLinkedList user_queue; + + /// Default constructor. + public RAISGD() + { + NegUsersInUsers = false; + NegUsersInItems = false; + NegItemsInUsers = true; + NegItemsInItems = false; + } + + /// + /// Initiates the base model and the user and/or item queue(s) + /// + protected override void InitModel() + { + base.InitModel(); + + negate_users = NegUsersInUsers || NegUsersInItems; + negate_items = NegItemsInUsers || NegItemsInItems; + + if (negate_users) + { + user_queue = new HashedLinkedList(); + user_queue.AddAll(Feedback.Users); + } + if (negate_items) + { + item_queue = new HashedLinkedList(); + item_queue.AddAll(Feedback.Items); + } + } + + /// + protected override void Retrain(System.Collections.Generic.ICollection> feedback) + { + foreach (var entry in feedback) + { + RetrainEntry(entry); + } + } + + /// + /// Retrains a single user-item pair. + /// + /// The user-item pair + protected virtual void RetrainEntry(Tuple entry) + { + InsertNegFeedback(entry); + for (uint i = 0; i < IncrIter; i++) + UpdateFactors(entry.Item1, entry.Item2, UpdateUsers, UpdateItems, 1); + } + + /// + /// Imputes a single negative feedback entry (a "negative" user-item pair). + /// + /// The user-item pair + protected virtual void InsertNegFeedback(Tuple entry) + { + int[] qu = Enumerable.Repeat(-1, neg_feedback_amount).ToArray(); + int[] qi = Enumerable.Repeat(-1, neg_feedback_amount).ToArray(); + if (negate_users) + { + for (uint i = 0; i < qu.Length && i < user_queue.Count - 1; ) + { + qu[i] = user_queue.RemoveFirst(); + if (qu[i] != entry.Item1) i++; + } + } + if (negate_items) + { + for (uint i = 0; i < qi.Length && i < item_queue.Count - 1; ) + { + qi[i] = item_queue.RemoveFirst(); + if (qi[i] != entry.Item2) i++; + } + } + //Console.WriteLine("Forgetting item "+qi); + if (negate_users) + foreach (var usr in qu.Reverse()) + if (usr >= 0) + for (uint i = 0; i < IncrIter; i++) + UpdateFactors(usr, entry.Item2, NegUsersInUsers, NegUsersInItems, 0); + if (negate_items) + foreach (var itm in qi.Reverse()) + if (itm >= 0) + for (uint i = 0; i < IncrIter; i++) + UpdateFactors(entry.Item1, itm, NegItemsInUsers, NegItemsInItems, 0); + + if (negate_items) + { + item_queue.Remove(entry.Item2); + item_queue.InsertLast(entry.Item2); + + foreach (var itm in qi.Reverse()) + if (itm >= 0 && itm != entry.Item2) + item_queue.InsertLast(itm); + } + if (negate_users) + { + user_queue.Remove(entry.Item1); + user_queue.InsertLast(entry.Item1); + + foreach (var usr in qu.Reverse()) + if (usr >= 0 && usr != entry.Item1) + user_queue.InsertLast(usr); + } + } + + /// + public override void RemoveItem(int item_id) + { + base.RemoveItem(item_id); + + if (negate_items) + item_queue.Remove(item_id); + } + + /// + public override void RemoveUser(int user_id) + { + base.RemoveUser(user_id); + + if (negate_users) + user_queue.Remove(user_id); + } + + /// + /// Performs factor updates for a user and item pair. + /// + /// The user ID + /// The item ID + /// Update user factors + /// Update item factors + /// The rating (1 for positive feedback, 0 for negative feedback) + protected virtual void UpdateFactors(int user_id, int item_id, bool update_user, bool update_item, float rating_val = 1) + { + //Console.WriteLine(float.MinValue); + float err = rating_val - Predict(user_id, item_id, false); + + // adjust factors + for (int f = 0; f < NumFactors; f++) + { + float u_f = user_factors[user_id, f]; + float i_f = item_factors[item_id, f]; + + // if necessary, compute and apply updates + if (update_user) + { + double delta_u = err * i_f - Regularization * u_f; + user_factors.Inc(user_id, f, current_learnrate * delta_u); + } + if (update_item) + { + double delta_i = err * u_f - Regularization * i_f; + item_factors.Inc(item_id, f, current_learnrate * delta_i); + } + } + + } + + + /// + public override string ToString() + { + return string.Format( + CultureInfo.InvariantCulture, + "RAISGD num_factors={0} regularization={1} learn_rate={2} num_iter={3} incr_iter={4} decay={5}, neg_feedback_amount={6}", + NumFactors, Regularization, LearnRate, NumIter, IncrIter, Decay, NegFeedbackAmount); + } + + + } +} + diff --git a/src/MyMediaLite/MyMediaLite.csproj b/src/MyMediaLite/MyMediaLite.csproj index 3e46e0e0..9dd46383 100644 --- a/src/MyMediaLite/MyMediaLite.csproj +++ b/src/MyMediaLite/MyMediaLite.csproj @@ -3,42 +3,38 @@ Debug AnyCPU - 10.0.0 + 8.0.30703 2.0 {73424F09-BEEA-4992-B116-5F123496F5D3} Library MyMediaLite MyMediaLite - 3.10 + 3.06 true full true - bin/Debug + bin\Debug DEBUG prompt 4 false - true + bin\Debug\MyMediaLite.xml none true - bin/Release + bin\Release prompt 4 false - bin/Release/MyMediaLite.xml + bin\Release\MyMediaLite.xml ..\packages\C5.2.3\lib\net40\C5.dll - - False - ..\packages\MathNet.Numerics.2.6.1\lib\net40\MathNet.Numerics.dll - ..\packages\MathNet.Numerics.2.6.1\lib\net40\MathNet.Numerics.IO.dll @@ -47,6 +43,9 @@ + + ..\packages\MathNet.Numerics.2.6.1\lib\net40\MathNet.Numerics.dll + @@ -243,6 +242,8 @@ + +