Skip to content

Commit 6df7204

Browse files
authored
Merge pull request #13 from auxten/movielens-din
Test din model on movielens dataset
2 parents b99164d + b0ceea8 commit 6df7204

23 files changed

Lines changed: 1467 additions & 359 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
*.dll
55
*.so
66
*.dylib
7+
*.run
8+
*.rpm
79

810
*.zip
911
model.txt

README.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,29 @@
44

55
Deep Learning(Item2vec Embedding + MLP) based Feature-Engineering & Training & Predict all in one Recommendation System that can run on small server or edge device.
66

7+
# Models implemented
8+
9+
- [x] [Simple 2 layer MLP](./nn/neural_network/multilayer_perceptron.go)
10+
- [x] [Simple 2 layer MLP test on MovieLens](./example/movielens/feature_test.go)
11+
- [x] Dropout and L2 regularization
12+
- [x] Batch Normalization
13+
- [x] [YouTube DNN](./model/din/simplemlp.go)
14+
- [x] [YouTube DNN test on MovieLens](./example/movielens/mlpimpl_test.go)
15+
- [x] Dropout and L2 regularization
16+
- [ ] Batch Normalization
17+
- [x] [DeepInterestNetwork](./model/din/din.go)
18+
- [x] [DIN test on MovieLens](./example/movielens/dinimpl_test.go)
19+
- [x] Euclidean Distance based attention
20+
- [x] Dropout and L2 regularization
21+
- [ ] Batch Normalization
22+
723
# Demo
824

925
You can run the MovieLens training and predict demo by:
1026

1127
```shell
1228
# download and unzip the SQLite DB file
13-
wget https://github.com/auxten/edgeRec/files/9482338/movielens.db.zip && \
29+
wget https://github.com/auxten/edgeRec/files/9895974/movielens.db.zip && \
1430
unzip movielens.db.zip
1531
# compile the edgeRec and put it in the current directory
1632
GOBIN=`pwd` go install github.com/auxten/edgeRec@latest && \
@@ -161,6 +177,7 @@ To make this project work, quite a lot of code are copied and modified from the
161177
162178
# Papers related
163179
180+
- [YouTube DNN](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45530.pdf)
164181
- [Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/abs/1706.06978)
165182
- [Document Embedding with Paragraph Vectors](https://arxiv.org/abs/1507.07998)
166183
- [EdgeRec: Recommender System on Edge in Mobile Taobao](https://arxiv.org/abs/2005.08416) // not very identical implementation

example/movielens/dinimpl.go

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package movielens
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/auxten/edgeRec/model/din"
7+
rcmd "github.com/auxten/edgeRec/recommend"
8+
log "github.com/sirupsen/logrus"
9+
"gonum.org/v1/gonum/mat"
10+
"gorgonia.org/tensor"
11+
)
12+
13+
type dinImpl struct {
14+
uProfileDim int
15+
uBehaviorSize int
16+
uBehaviorDim int
17+
iFeatureDim int
18+
cFeatureDim int
19+
20+
predBatchSize int
21+
batchSize, epochs int
22+
sampleInfo *rcmd.SampleInfo
23+
24+
// stop training on earlyStop count of no cost improvement
25+
// 0 means no early stop
26+
earlyStop int
27+
28+
learner *din.DinNet
29+
pred *din.DinNet
30+
}
31+
32+
func (d *dinImpl) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense {
33+
numPred, _ := X.Dims()
34+
inputTensor := tensor.New(tensor.WithShape(X.Dims()), tensor.WithBacking(X.(*mat.Dense).RawMatrix().Data))
35+
y, err := din.Predict(d.pred, numPred, d.predBatchSize, d.sampleInfo, inputTensor)
36+
if err != nil {
37+
log.Errorf("predict din model failed: %v", err)
38+
return nil
39+
}
40+
yDense := mat.NewDense(numPred, 1, y)
41+
if Y != nil {
42+
Y.(*mat.Dense).SetRawMatrix(yDense.RawMatrix())
43+
}
44+
45+
return yDense
46+
}
47+
48+
func (d *dinImpl) Fit(trainSample *rcmd.TrainSample) (pred rcmd.PredictAbstract, err error) {
49+
d.uProfileDim = trainSample.Info.UserProfileRange[1] - trainSample.Info.UserProfileRange[0]
50+
d.uBehaviorSize = rcmd.UserBehaviorLen
51+
d.uBehaviorDim = rcmd.ItemEmbDim
52+
d.iFeatureDim = rcmd.ItemEmbDim
53+
d.cFeatureDim = trainSample.Info.CtxFeatureRange[1] - trainSample.Info.CtxFeatureRange[0]
54+
d.sampleInfo = &trainSample.Info
55+
56+
sampleLen := len(trainSample.Data)
57+
X := mat.NewDense(sampleLen, len(trainSample.Data[0].Input), nil)
58+
for i, sample := range trainSample.Data {
59+
X.SetRow(i, sample.Input)
60+
}
61+
Y := mat.NewDense(sampleLen, 1, nil)
62+
for i, sample := range trainSample.Data {
63+
Y.Set(i, 0, sample.Response[0])
64+
}
65+
66+
d.learner = din.NewDinNet(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim)
67+
var (
68+
inputs, labels tensor.Tensor
69+
numExamples, _ = X.Dims()
70+
numLabels, _ = Y.Dims()
71+
)
72+
if numExamples != numLabels {
73+
err = fmt.Errorf("number of examples and labels do not match")
74+
return
75+
}
76+
77+
inputs = tensor.New(tensor.WithShape(X.Dims()), tensor.WithBacking(X.RawMatrix().Data))
78+
labels = tensor.New(tensor.WithShape(Y.Dims()), tensor.WithBacking(Y.RawMatrix().Data))
79+
err = din.Train(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim,
80+
numExamples, d.batchSize, d.epochs, d.earlyStop,
81+
d.sampleInfo,
82+
inputs, labels,
83+
d.learner,
84+
)
85+
if err != nil {
86+
log.Errorf("train din model failed: %v", err)
87+
return
88+
}
89+
dinJson, err := d.learner.Marshal()
90+
if err != nil {
91+
log.Errorf("marshal din model failed: %v", err)
92+
return
93+
}
94+
dinPred, err := din.NewDinNetFromJson(dinJson)
95+
if err != nil {
96+
log.Errorf("new din model from json failed: %v", err)
97+
return
98+
}
99+
err = din.InitForwardOnlyVm(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim,
100+
d.predBatchSize, dinPred)
101+
if err != nil {
102+
log.Errorf("init forward only vm failed: %v", err)
103+
return
104+
}
105+
d.pred = dinPred
106+
107+
return d, nil
108+
}

example/movielens/dinimpl_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package movielens
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"math/rand"
7+
"testing"
8+
9+
"github.com/auxten/edgeRec/nn/metrics"
10+
rcmd "github.com/auxten/edgeRec/recommend"
11+
. "github.com/smartystreets/goconvey/convey"
12+
"gonum.org/v1/gonum/mat"
13+
)
14+
15+
type dinPredictor struct {
16+
rcmd.PreRanker
17+
rcmd.Predictor
18+
rcmd.UserBehavior
19+
}
20+
21+
func TestDinOnMovielens(t *testing.T) {
22+
rand.Seed(42)
23+
24+
var (
25+
movielens = &MovielensRec{
26+
DataPath: "movielens.db",
27+
SampleCnt: 79948,
28+
//SampleCnt: 10000,
29+
}
30+
model rcmd.Predictor
31+
err error
32+
)
33+
34+
Convey("Train din model", t, func() {
35+
dinModel := &dinImpl{
36+
predBatchSize: 100,
37+
batchSize: 200,
38+
epochs: 100,
39+
earlyStop: 20,
40+
}
41+
trainCtx := context.Background()
42+
model, err = rcmd.Train(trainCtx, movielens, dinModel)
43+
So(err, ShouldBeNil)
44+
So(model, ShouldNotBeNil)
45+
})
46+
47+
Convey("Predict din model", t, func() {
48+
testCount := 20600
49+
rows, err := db.Query(
50+
"SELECT userId, movieId, rating, timestamp FROM ratings_test ORDER BY timestamp, userId ASC LIMIT ?", testCount)
51+
So(err, ShouldBeNil)
52+
var (
53+
userId int
54+
itemId int
55+
rating float64
56+
timestamp int64
57+
yTrue = mat.NewDense(testCount, 1, nil)
58+
sampleKeys = make([]rcmd.Sample, 0, testCount)
59+
)
60+
for i := 0; rows.Next(); i++ {
61+
err = rows.Scan(&userId, &itemId, &rating, &timestamp)
62+
if err != nil {
63+
t.Errorf("scan error: %v", err)
64+
}
65+
yTrue.Set(i, 0, BinarizeLabel(rating))
66+
sampleKeys = append(sampleKeys, rcmd.Sample{userId, itemId, 0, timestamp})
67+
}
68+
batchPredictCtx := context.Background()
69+
dinPred := &dinPredictor{
70+
PreRanker: movielens,
71+
Predictor: model,
72+
UserBehavior: movielens,
73+
}
74+
yPred, err := rcmd.BatchPredict(batchPredictCtx, dinPred, sampleKeys)
75+
So(err, ShouldBeNil)
76+
rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil)
77+
rowCount, _ := yTrue.Dims()
78+
fmt.Printf("rocAuc on test set %d: %f\n", rowCount, rocAuc)
79+
})
80+
}

0 commit comments

Comments
 (0)