From 5fee62e3c51ff9b7629cf58adea3bdc2b134cdd7 Mon Sep 17 00:00:00 2001 From: auxten Date: Mon, 31 Oct 2022 18:13:52 +0800 Subject: [PATCH 1/4] Use float32 to reduce mem usage --- example/futuresales/main_test.go | 9 +- example/movielens/dinimpl.go | 41 +--- example/movielens/dinimpl_test.go | 15 +- example/movielens/feature.go | 35 +-- example/movielens/feature_test.go | 20 +- example/movielens/mlp.go | 43 +++- example/movielens/mlpimpl.go | 39 +-- example/movielens/mlpimpl_test.go | 21 +- feature/embedding/model/model.go | 1 + feature/embedding/model/word2vec/word2vec.go | 47 +++- feature/multihot.go | 11 + model/din/activation_test.go | 30 +-- model/din/din.go | 48 ++-- model/din/model.go | 67 +++-- model/din/model_test.go | 28 +-- model/din/simplemlp.go | 30 +-- ps/batch.go | 187 -------------- ps/batch_test.go | 39 --- ps/printer.go | 68 ------ ps/sample.go | 58 ----- ps/sample_test.go | 39 --- ps/solver.go | 95 -------- ps/trainer.go | 109 --------- ps/trainer_test.go | 244 ------------------- recommend/rcmd.go | 116 +++++---- utils/util.go | 8 + 26 files changed, 361 insertions(+), 1087 deletions(-) delete mode 100644 ps/batch.go delete mode 100644 ps/batch_test.go delete mode 100644 ps/printer.go delete mode 100644 ps/sample.go delete mode 100644 ps/sample_test.go delete mode 100644 ps/solver.go delete mode 100644 ps/trainer.go delete mode 100644 ps/trainer_test.go diff --git a/example/futuresales/main_test.go b/example/futuresales/main_test.go index ebb8521..544f662 100644 --- a/example/futuresales/main_test.go +++ b/example/futuresales/main_test.go @@ -13,6 +13,7 @@ import ( "github.com/auxten/edgeRec/feature" "github.com/auxten/edgeRec/nn" "github.com/auxten/edgeRec/ps" + "github.com/auxten/edgeRec/recommend" "github.com/auxten/edgeRec/schema" "github.com/auxten/edgeRec/utils" log "github.com/sirupsen/logrus" @@ -84,7 +85,7 @@ func TestTrain(t *testing.T) { fmt.Printf("training data count: %d\n", trainCount) // training - trainSample := make(ps.Samples, trainCount) + trainSample := make(recommend.Samples, trainCount) { trainRows, err := scanner.GetRows(fmt.Sprintf(`select date, date_block_num, shop_id, s.item_id, item_price, item_category_id, item_name, item_cnt_day from sales_train s left join items i on s.item_id = i.item_id limit %d`, trainCount)) @@ -104,7 +105,7 @@ func TestTrain(t *testing.T) { if err != nil { log.Fatal(err) } - trainSample[i] = ps.Sample{ + trainSample[i] = recommend.Sample{ Input: featureTransform(date, date_block_num, shop_id, item_id, item_price, item_category_id.Float64, item_name.String), Response: []float64{outputTransform(item_cnt_day)}} i++ @@ -118,7 +119,7 @@ func TestTrain(t *testing.T) { // 10% test data fmt.Printf("test data count: %d\n", testCount) i = 0 - testSample := make(ps.Samples, testCount) + testSample := make(recommend.Samples, testCount) rows, err := scanner.GetRows(fmt.Sprintf(`select date, date_block_num, shop_id, s.item_id, item_price, item_category_id, item_name, item_cnt_day from sales_train s left join items i on s.item_id = i.item_id limit %d, %d`, trainCount, testCount)) @@ -138,7 +139,7 @@ func TestTrain(t *testing.T) { if err != nil { log.Fatal(err) } - testSample[i] = ps.Sample{ + testSample[i] = recommend.Sample{ Input: featureTransform(date, date_block_num, shop_id, item_id, item_price, item_category_id.Float64, item_name.String), Response: []float64{outputTransform(item_cnt_day)}} i++ diff --git a/example/movielens/dinimpl.go b/example/movielens/dinimpl.go index 634ab46..d816869 100644 --- a/example/movielens/dinimpl.go +++ b/example/movielens/dinimpl.go @@ -6,7 +6,6 @@ import ( "github.com/auxten/edgeRec/model/din" rcmd "github.com/auxten/edgeRec/recommend" log "github.com/sirupsen/logrus" - "gonum.org/v1/gonum/mat" "gorgonia.org/tensor" ) @@ -29,18 +28,14 @@ type dinImpl struct { pred *din.DinNet } -func (d *dinImpl) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense { - numPred, _ := X.Dims() - inputTensor := tensor.New(tensor.WithShape(X.Dims()), tensor.WithBacking(X.(*mat.Dense).RawMatrix().Data)) - y, err := din.Predict(d.pred, numPred, d.predBatchSize, d.sampleInfo, inputTensor) +func (d *dinImpl) Predict(X tensor.Tensor) tensor.Tensor { + numPred := X.Shape()[0] + y, err := din.Predict(d.pred, numPred, d.predBatchSize, d.sampleInfo, X) if err != nil { log.Errorf("predict din model failed: %v", err) return nil } - yDense := mat.NewDense(numPred, 1, y) - if Y != nil { - Y.(*mat.Dense).SetRawMatrix(yDense.RawMatrix()) - } + yDense := tensor.NewDense(din.DT, tensor.Shape{numPred, 1}, tensor.WithBacking(y)) return yDense } @@ -53,31 +48,19 @@ func (d *dinImpl) Fit(trainSample *rcmd.TrainSample) (pred rcmd.PredictAbstract, d.cFeatureDim = trainSample.Info.CtxFeatureRange[1] - trainSample.Info.CtxFeatureRange[0] d.sampleInfo = &trainSample.Info - sampleLen := len(trainSample.Data) - X := mat.NewDense(sampleLen, len(trainSample.Data[0].Input), nil) - for i, sample := range trainSample.Data { - X.SetRow(i, sample.Input) - } - Y := mat.NewDense(sampleLen, 1, nil) - for i, sample := range trainSample.Data { - Y.Set(i, 0, sample.Response[0]) + if trainSample.Rows != len(trainSample.Y) { + err = fmt.Errorf("number of examples %d and labels %d do not match", + trainSample.Rows, len(trainSample.Y)) + return } + inputs := tensor.New(tensor.WithShape(trainSample.Rows, trainSample.XCols), tensor.WithBacking(trainSample.X)) + labels := tensor.New(tensor.WithShape(trainSample.Rows, 1), tensor.WithBacking(trainSample.Y)) + d.learner = din.NewDinNet(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim) - var ( - inputs, labels tensor.Tensor - numExamples, _ = X.Dims() - numLabels, _ = Y.Dims() - ) - if numExamples != numLabels { - err = fmt.Errorf("number of examples and labels do not match") - return - } - inputs = tensor.New(tensor.WithShape(X.Dims()), tensor.WithBacking(X.RawMatrix().Data)) - labels = tensor.New(tensor.WithShape(Y.Dims()), tensor.WithBacking(Y.RawMatrix().Data)) err = din.Train(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim, - numExamples, d.batchSize, d.epochs, d.earlyStop, + trainSample.Rows, d.batchSize, d.epochs, d.earlyStop, d.sampleInfo, inputs, labels, d.learner, diff --git a/example/movielens/dinimpl_test.go b/example/movielens/dinimpl_test.go index 6ceee76..6e63c85 100644 --- a/example/movielens/dinimpl_test.go +++ b/example/movielens/dinimpl_test.go @@ -6,10 +6,9 @@ import ( "math/rand" "testing" - "github.com/auxten/edgeRec/nn/metrics" + "github.com/auxten/edgeRec/model/din" rcmd "github.com/auxten/edgeRec/recommend" . "github.com/smartystreets/goconvey/convey" - "gonum.org/v1/gonum/mat" ) type dinPredictor struct { @@ -52,9 +51,9 @@ func TestDinOnMovielens(t *testing.T) { var ( userId int itemId int - rating float64 + rating float32 timestamp int64 - yTrue = mat.NewDense(testCount, 1, nil) + yTrue []float32 sampleKeys = make([]rcmd.Sample, 0, testCount) ) for i := 0; rows.Next(); i++ { @@ -62,7 +61,8 @@ func TestDinOnMovielens(t *testing.T) { if err != nil { t.Errorf("scan error: %v", err) } - yTrue.Set(i, 0, BinarizeLabel(rating)) + //yTrue.Set(i, 0, BinarizeLabel(rating)) + yTrue = append(yTrue, BinarizeLabel32(rating)) sampleKeys = append(sampleKeys, rcmd.Sample{userId, itemId, 0, timestamp}) } batchPredictCtx := context.Background() @@ -73,8 +73,9 @@ func TestDinOnMovielens(t *testing.T) { } yPred, err := rcmd.BatchPredict(batchPredictCtx, dinPred, sampleKeys) So(err, ShouldBeNil) - rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil) - rowCount, _ := yTrue.Dims() + rocAuc := din.RocAuc32(yPred.Data().([]float32), yTrue) + //rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil) + rowCount := len(yTrue) fmt.Printf("rocAuc on test set %d: %f\n", rowCount, rocAuc) }) } diff --git a/example/movielens/feature.go b/example/movielens/feature.go index 7635824..361dfe6 100644 --- a/example/movielens/feature.go +++ b/example/movielens/feature.go @@ -39,7 +39,7 @@ func initDb(dbPath string) (err error) { type MovielensRec struct { DataPath string SampleCnt int - mRatingMap map[int][2]float64 + mRatingMap map[int][2]float32 ubcTrain *ubcache.UserBehaviorCache ubcPredict *ubcache.UserBehaviorCache } @@ -103,8 +103,8 @@ func (recSys *MovielensRec) GetItemFeature(ctx context.Context, itemId int) (ten var ( movieYear int itemTitle, itemGenres string - avgRating, cntRating float64 - GenreTensor [50]float64 // 5 * 10 + avgRating, cntRating float32 + GenreTensor [50]float32 // 5 * 10 ) if err = rows.Scan(&itemTitle, &itemGenres); err != nil { log.Errorf("failed to scan item %d: %v", itemId, err) @@ -129,11 +129,11 @@ func (recSys *MovielensRec) GetItemFeature(ctx context.Context, itemId int) (ten } if mr, ok := recSys.mRatingMap[itemId]; ok { avgRating = mr[0] / 5. - cntRating = math.Log2(mr[1]) + cntRating = float32(math.Log2(float64(mr[1]))) } - tensor = utils.ConcatSlice(tensor, GenreTensor[:], rcmd.Tensor{ - float64(movieYear-1990) / 20.0, avgRating, cntRating, + tensor = utils.ConcatSlice32(tensor, GenreTensor[:], rcmd.Tensor{ + float32(movieYear-1990) / 20.0, avgRating, cntRating, }) return } else { @@ -149,7 +149,7 @@ func (recSys *MovielensRec) GetUserFeature(ctx context.Context, userId int) (ten ugenres sql.NullString avgRating sql.NullFloat64 cntRating sql.NullFloat64 - top5GenresTensor [50]float64 + top5GenresTensor [50]float32 ) // get stage value from ctx stage := ctx.Value(rcmd.StageKey).(rcmd.Stage) @@ -183,7 +183,7 @@ func (recSys *MovielensRec) GetUserFeature(ctx context.Context, userId int) (ten for i, genre := range top5Genres { copy(top5GenresTensor[i*10:], genreFeature(genre.Key)) } - tensor = utils.ConcatSlice(rcmd.Tensor{avgRating.Float64 / 5., cntRating.Float64 / 100.}, top5GenresTensor[:]) + tensor = utils.ConcatSlice32(rcmd.Tensor{float32(avgRating.Float64) / 5., float32(cntRating.Float64) / 100.}, top5GenresTensor[:]) if rcmd.DebugItemId != 0 && userId == rcmd.DebugUserId { log.Infof("user %d: %v ", userId, tensor) } @@ -196,7 +196,7 @@ func (recSys *MovielensRec) GetUserFeature(ctx context.Context, userId int) (ten } func genreFeature(genre string) (tensor rcmd.Tensor) { - return feature.HashOneHot([]byte(genre), 10) + return feature.HashOneHot32([]byte(genre), 10) } func (recSys *MovielensRec) SampleGenerator(_ context.Context) (ret <-chan rcmd.Sample, err error) { @@ -228,14 +228,14 @@ func (recSys *MovielensRec) SampleGenerator(_ context.Context) (ret <-chan rcmd. i++ var ( userId, movieId int - rating, label float64 + rating, label float32 timestamp int64 ) if err = rows.Scan(&userId, &movieId, &rating, ×tamp); err != nil { log.Errorf("failed to scan ratings: %v", err) return } - label = BinarizeLabel(rating) + label = BinarizeLabel32(rating) // label = rating / 5.0 sampleCh <- rcmd.Sample{ @@ -268,18 +268,18 @@ func (recSys *MovielensRec) PreTrain(ctx context.Context) (err error) { return } defer rows1.Close() - recSys.mRatingMap = make(map[int][2]float64) + recSys.mRatingMap = make(map[int][2]float32) for rows1.Next() { var ( movieId int - avgR float64 + avgR float32 cntR int ) if err = rows1.Scan(&movieId, &avgR, &cntR); err != nil { log.Errorf("failed to scan movieId: %v", err) return } - recSys.mRatingMap[movieId] = [2]float64{avgR, float64(cntR)} + recSys.mRatingMap[movieId] = [2]float32{avgR, float32(cntR)} } // fill user behavior cache @@ -389,3 +389,10 @@ func BinarizeLabel(rating float64) float64 { } return 0.0 } + +func BinarizeLabel32(rating float32) float32 { + if rating > 3.5 { + return 1.0 + } + return 0.0 +} diff --git a/example/movielens/feature_test.go b/example/movielens/feature_test.go index 46ea86d..6d05e88 100644 --- a/example/movielens/feature_test.go +++ b/example/movielens/feature_test.go @@ -46,7 +46,7 @@ func TestFeatureEngineer(t *testing.T) { testData := []struct { userId int itemId int - expected float64 + expected float32 }{ {8, 527, 1.}, {8, 432, 0.}, @@ -68,8 +68,8 @@ func TestFeatureEngineer(t *testing.T) { fmt.Printf("userId:%d, itemId:%d, expected:%f, pred:%f\n", test.userId, test.itemId, test.expected, score[0].Score) //So(pred.At(0, 0), ShouldAlmostEqual, test.expected) - yTrue.Set(i, 0, test.expected) - yPred.Set(i, 0, score[0].Score) + yTrue.Set(i, 0, float64(test.expected)) + yPred.Set(i, 0, float64(score[0].Score)) } rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil) @@ -84,9 +84,10 @@ func TestFeatureEngineer(t *testing.T) { var ( userId int itemId int - rating float64 + rating float32 timestamp int64 yTrue = mat.NewDense(testCount, 1, nil) + yPredDense = mat.NewDense(testCount, 1, nil) sampleKeys = make([]rcmd.Sample, 0, testCount) ) for i := 0; rows.Next(); i++ { @@ -94,13 +95,20 @@ func TestFeatureEngineer(t *testing.T) { if err != nil { t.Errorf("scan error: %v", err) } - yTrue.Set(i, 0, BinarizeLabel(rating)) + yTrue.Set(i, 0, BinarizeLabel(float64(rating))) sampleKeys = append(sampleKeys, rcmd.Sample{userId, itemId, 0, timestamp}) } batchPredictCtx := context.Background() yPred, err := rcmd.BatchPredict(batchPredictCtx, model, sampleKeys) So(err, ShouldBeNil) - rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil) + for i := 0; i < testCount; i++ { + val, err := yPred.At(i, 0) + if err != nil { + t.Errorf("yPred.At error: %v", err) + } + yPredDense.Set(i, 0, float64(val.(float32))) + } + rocAuc := metrics.ROCAUCScore(yTrue, yPredDense, "", nil) rowCount, _ := yTrue.Dims() fmt.Printf("rocAuc on test set %d: %f\n", rowCount, rocAuc) }) diff --git a/example/movielens/mlp.go b/example/movielens/mlp.go index 5714bb5..94542f4 100644 --- a/example/movielens/mlp.go +++ b/example/movielens/mlp.go @@ -5,14 +5,37 @@ import ( nn "github.com/auxten/edgeRec/nn/neural_network" rcmd "github.com/auxten/edgeRec/recommend" "gonum.org/v1/gonum/mat" + "gorgonia.org/tensor" ) type predWrap struct { pred base.Predicter } -func (p *predWrap) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense { - return p.pred.Predict(X, Y) +func (p *predWrap) Predict(X tensor.Tensor) tensor.Tensor { + numPred := X.Shape()[0] + xWidth := X.Shape()[1] + //convert float32 tensor to float64 mat.Dense + xDense := mat.NewDense(numPred, X.Shape()[1], nil) + for i := 0; i < numPred; i++ { + for j := 0; j < xWidth; j++ { + val, err := X.At(i, j) + if err != nil { + return nil + } + xDense.Set(i, j, float64(val.(float32))) + } + } + yMutable := mat.NewDense(numPred, 1, nil) + p.pred.Predict(xDense, yMutable) + + //convert float64 mat.Dense to float32 tensor + y := make([]float32, numPred) + for i := 0; i < numPred; i++ { + val := yMutable.At(i, 0) + y[i] = float32(val) + } + return tensor.NewDense(tensor.Float32, tensor.Shape{numPred, 1}, tensor.WithBacking(y)) } type fitWrap struct { @@ -20,14 +43,18 @@ type fitWrap struct { } func (fit *fitWrap) Fit(trainSample *rcmd.TrainSample) (rcmd.PredictAbstract, error) { - sampleLen := len(trainSample.Data) - sampleDense := mat.NewDense(sampleLen, len(trainSample.Data[0].Input), nil) - for i, sample := range trainSample.Data { - sampleDense.SetRow(i, sample.Input) + sampleLen := trainSample.Rows + x64 := make([]float64, sampleLen*trainSample.XCols) + for i := 0; i < sampleLen; i++ { + for j := 0; j < trainSample.XCols; j++ { + x64[i*trainSample.XCols+j] = float64(trainSample.X[i*trainSample.XCols+j]) + } } + sampleDense := mat.NewDense(sampleLen, trainSample.XCols, x64) + yClass := mat.NewDense(sampleLen, 1, nil) - for i, sample := range trainSample.Data { - yClass.Set(i, 0, sample.Response[0]) + for i, sample := range trainSample.Y { + yClass.Set(i, 0, float64(sample)) } pred := fit.model.Fit(sampleDense, yClass) diff --git a/example/movielens/mlpimpl.go b/example/movielens/mlpimpl.go index b46a013..3280636 100644 --- a/example/movielens/mlpimpl.go +++ b/example/movielens/mlpimpl.go @@ -6,7 +6,6 @@ import ( "github.com/auxten/edgeRec/model/din" rcmd "github.com/auxten/edgeRec/recommend" log "github.com/sirupsen/logrus" - "gonum.org/v1/gonum/mat" "gorgonia.org/tensor" ) @@ -29,18 +28,14 @@ type mlpImpl struct { pred *din.SimpleMLP } -func (d *mlpImpl) Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense { - numPred, _ := X.Dims() - inputTensor := tensor.New(tensor.WithShape(X.Dims()), tensor.WithBacking(X.(*mat.Dense).RawMatrix().Data)) - y, err := din.Predict(d.pred, numPred, d.predBatchSize, d.sampleInfo, inputTensor) +func (d *mlpImpl) Predict(X tensor.Tensor) tensor.Tensor { + numPred := X.Shape()[0] + y, err := din.Predict(d.pred, numPred, d.predBatchSize, d.sampleInfo, X) if err != nil { log.Errorf("predict din model failed: %v", err) return nil } - yDense := mat.NewDense(numPred, 1, y) - if Y != nil { - Y.(*mat.Dense).SetRawMatrix(yDense.RawMatrix()) - } + yDense := tensor.NewDense(din.DT, tensor.Shape{numPred, 1}, tensor.WithBacking(y)) return yDense } @@ -53,31 +48,17 @@ func (d *mlpImpl) Fit(trainSample *rcmd.TrainSample) (pred rcmd.PredictAbstract, d.cFeatureDim = trainSample.Info.CtxFeatureRange[1] - trainSample.Info.CtxFeatureRange[0] d.sampleInfo = &trainSample.Info - sampleLen := len(trainSample.Data) - X := mat.NewDense(sampleLen, len(trainSample.Data[0].Input), nil) - for i, sample := range trainSample.Data { - X.SetRow(i, sample.Input) - } - Y := mat.NewDense(sampleLen, 1, nil) - for i, sample := range trainSample.Data { - Y.Set(i, 0, sample.Response[0]) - } - - d.learner = din.NewSimpleMLP(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim) - var ( - inputs, labels tensor.Tensor - numExamples, _ = X.Dims() - numLabels, _ = Y.Dims() - ) - if numExamples != numLabels { + if trainSample.Rows != len(trainSample.Y) { err = fmt.Errorf("number of examples and labels do not match") return } - inputs = tensor.New(tensor.WithShape(X.Dims()), tensor.WithBacking(X.RawMatrix().Data)) - labels = tensor.New(tensor.WithShape(Y.Dims()), tensor.WithBacking(Y.RawMatrix().Data)) + d.learner = din.NewSimpleMLP(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim) + + inputs := tensor.New(tensor.WithShape(trainSample.Rows, trainSample.XCols), tensor.WithBacking(trainSample.X)) + labels := tensor.New(tensor.WithShape(trainSample.Rows, 1), tensor.WithBacking(trainSample.Y)) err = din.Train(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim, - numExamples, d.batchSize, d.epochs, d.earlyStop, + trainSample.Rows, d.batchSize, d.epochs, d.earlyStop, d.sampleInfo, inputs, labels, d.learner, diff --git a/example/movielens/mlpimpl_test.go b/example/movielens/mlpimpl_test.go index c9be287..c203e59 100644 --- a/example/movielens/mlpimpl_test.go +++ b/example/movielens/mlpimpl_test.go @@ -6,10 +6,9 @@ import ( "math/rand" "testing" - "github.com/auxten/edgeRec/nn/metrics" + "github.com/auxten/edgeRec/model/din" rcmd "github.com/auxten/edgeRec/recommend" . "github.com/smartystreets/goconvey/convey" - "gonum.org/v1/gonum/mat" ) func TestSimpleMLPOnMovielens(t *testing.T) { @@ -49,9 +48,9 @@ func TestSimpleMLPOnMovielens(t *testing.T) { var ( userId int itemId int - rating float64 + rating float32 timestamp int64 - yTrue = mat.NewDense(testCount, 1, nil) + yTrue []float32 sampleKeys = make([]rcmd.Sample, 0, testCount) ) for i := 0; rows.Next(); i++ { @@ -59,14 +58,20 @@ func TestSimpleMLPOnMovielens(t *testing.T) { if err != nil { t.Errorf("scan error: %v", err) } - yTrue.Set(i, 0, BinarizeLabel(rating)) + //yTrue.Set(i, 0, BinarizeLabel(rating)) + yTrue = append(yTrue, BinarizeLabel32(rating)) sampleKeys = append(sampleKeys, rcmd.Sample{userId, itemId, 0, timestamp}) } batchPredictCtx := context.Background() - yPred, err := rcmd.BatchPredict(batchPredictCtx, model, sampleKeys) + dinPred := &dinPredictor{ + PreRanker: movielens, + Predictor: model, + UserBehavior: movielens, + } + yPred, err := rcmd.BatchPredict(batchPredictCtx, dinPred, sampleKeys) So(err, ShouldBeNil) - rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil) - rowCount, _ := yTrue.Dims() + rocAuc := din.RocAuc32(yPred.Data().([]float32), yTrue) + rowCount := len(yTrue) fmt.Printf("rocAuc on test set %d: %f\n", rowCount, rocAuc) }) } diff --git a/feature/embedding/model/model.go b/feature/embedding/model/model.go index ff187b9..aa73095 100644 --- a/feature/embedding/model/model.go +++ b/feature/embedding/model/model.go @@ -26,5 +26,6 @@ type Model interface { Save(io.Writer, vector.Type) error WordVector(vector.Type) *matrix.Matrix GenEmbeddingMap() (map[string][]float64, error) + GenEmbeddingMap32() (map[string][]float32, error) EmbeddingByWord(word string) ([]float64, bool) } diff --git a/feature/embedding/model/word2vec/word2vec.go b/feature/embedding/model/word2vec/word2vec.go index 118a25f..c1b6718 100644 --- a/feature/embedding/model/word2vec/word2vec.go +++ b/feature/embedding/model/word2vec/word2vec.go @@ -42,23 +42,30 @@ type word2vec struct { corpus corpus.Corpus - param *matrix.Matrix - subsampler *subsample.Subsampler - currentlr float64 - mod mod - optimizer optimizer - embeddingMap EmbeddingMap + param *matrix.Matrix + subsampler *subsample.Subsampler + currentlr float64 + mod mod + optimizer optimizer + embeddingMap EmbeddingMap + embeddingMap32 EmbeddingMap32 verbose *verbose.Verbose } type EmbeddingMap map[string][]float64 +type EmbeddingMap32 map[string][]float32 func (m *EmbeddingMap) Get(word string) ([]float64, bool) { vec, ok := (*m)[word] return vec, ok } +func (m *EmbeddingMap32) Get(word string) ([]float32, bool) { + vec, ok := (*m)[word] + return vec, ok +} + func New(opts ...ModelOption) (model.Model, error) { options := DefaultOptions() for _, fn := range opts { @@ -288,6 +295,34 @@ func (w *word2vec) GenEmbeddingMap() (embMap map[string][]float64, err error) { return w.embeddingMap, nil } +func (w *word2vec) GenEmbeddingMap32() (embMap map[string][]float32, err error) { + dict := w.corpus.Dictionary() + wordVec := w.WordVector(vector.Agg) + if dict.Len() != wordVec.Row() { + err = fmt.Errorf("dictionary and word vector size mismatch") + return + } + if dict.Len() == 0 { + err = fmt.Errorf("dictionary is empty") + return + } + + w.embeddingMap32 = make(map[string][]float32) + clk := clock.New() + for i := 0; i < dict.Len(); i++ { + word, _ := dict.Word(i) + vec := wordVec.Slice(i) + w.embeddingMap32[word] = make([]float32, len(vec)) + for j := 0; j < len(vec); j++ { + w.embeddingMap32[word][j] = float32(vec[j]) + } + } + + log.Debugf("embedding map size %d created %v", len(w.embeddingMap32), clk.AllElapsed()) + + return w.embeddingMap32, nil +} + func LoadEmbeddingMap(f io.Reader) (embMap map[string][]float64, err error) { var ( embeddings emb.Embeddings diff --git a/feature/multihot.go b/feature/multihot.go index 63cd20f..301012e 100644 --- a/feature/multihot.go +++ b/feature/multihot.go @@ -23,6 +23,17 @@ func HashOneHot(buf []byte, size int) []float64 { return result } +func HashOneHot32(buf []byte, size int) []float32 { + result := make([]float32, size) + hash := fnv.New32() + _, err := hash.Write(buf) + if err != nil { + return nil + } + result[int(hash.Sum32())%size] = 1 + return result +} + func StringSplitMultiHot(str string, sep string, size int) []float64 { result := make([]float64, size) for _, s := range strings.Split(str, sep) { diff --git a/model/din/activation_test.go b/model/din/activation_test.go index 0608116..2a03a2a 100644 --- a/model/din/activation_test.go +++ b/model/din/activation_test.go @@ -11,8 +11,8 @@ import ( func TestPRelu(t *testing.T) { Convey("prelu", t, func() { g := G.NewGraph() - x := G.NodeFromAny(g, tensor.New(tensor.WithShape(4, 1), tensor.WithBacking([]float64{-1, -2, 3, 4})), G.WithName("x")) - a := G.NewScalar(g, G.Float64, G.WithValue(0.1), G.WithName("a")) + x := G.NodeFromAny(g, tensor.New(tensor.WithShape(4, 1), tensor.WithBacking([]float32{-1, -2, 3, 4})), G.WithName("x")) + a := G.NewScalar(g, G.Float32, G.WithValue(0.1), G.WithName("a")) output := PRelu(x, a) //cost := G.Must(G.Mean(output)) // @@ -24,15 +24,15 @@ func TestPRelu(t *testing.T) { t.Fatalf("%+v", err) } defer m.Close() - So(output.Value().Data(), ShouldResemble, []float64{-0.1, -0.2, 3, 4}) + So(output.Value().Data(), ShouldResemble, []float32{-0.1, -0.2, 3, 4}) }) } func TestEucDistance(t *testing.T) { Convey("euc distance 2 dim", t, func() { g := G.NewGraph() - x := G.NodeFromAny(g, tensor.New(tensor.WithShape(2, 4), tensor.WithBacking([]float64{1, 2, -3, 4, -1, 0, -1, 2})), G.WithName("x")) - y := G.NodeFromAny(g, tensor.New(tensor.WithShape(2, 4), tensor.WithBacking([]float64{1, 2, -3, 4, 0, 1, 0, 1})), G.WithName("y")) + x := G.NodeFromAny(g, tensor.New(tensor.WithShape(2, 4), tensor.WithBacking([]float32{1, 2, -3, 4, -1, 0, -1, 2})), G.WithName("x")) + y := G.NodeFromAny(g, tensor.New(tensor.WithShape(2, 4), tensor.WithBacking([]float32{1, 2, -3, 4, 0, 1, 0, 1})), G.WithName("y")) output := EucDistance(x, y) m := G.NewTapeMachine(g) if err := m.RunAll(); err != nil { @@ -40,12 +40,12 @@ func TestEucDistance(t *testing.T) { } defer m.Close() So([]int(output.Shape()), ShouldResemble, []int{2}) - So(output.Value().Data(), ShouldResemble, []float64{0, 2}) + So(output.Value().Data(), ShouldResemble, []float32{0, 2}) }) Convey("euc distance 3 dim no broadcast", t, func() { g := G.NewGraph() - x := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float64{1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0})), G.WithName("x")) - y := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})), G.WithName("y")) + x := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float32{1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0})), G.WithName("x")) + y := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})), G.WithName("y")) output := EucDistance(x, y) m := G.NewTapeMachine(g) if err := m.RunAll(); err != nil { @@ -53,12 +53,12 @@ func TestEucDistance(t *testing.T) { } defer m.Close() So([]int(output.Shape()), ShouldResemble, []int{3, 2}) - So(output.Value().Data(), ShouldResemble, []float64{1, 0, 1, 1, 0, 1}) + So(output.Value().Data(), ShouldResemble, []float32{1, 0, 1, 1, 0, 1}) }) Convey("euc distance 3 dim broadcast y", t, func() { g := G.NewGraph() - x := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float64{0, 2, 0, 0, -1, 1, -1, 1, 1, 2, 2, 2})), G.WithName("x")) - y := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 1, 2), tensor.WithBacking([]float64{0, 1, 0, 1, 1, 2})), G.WithName("y")) + x := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float32{0, 2, 0, 0, -1, 1, -1, 1, 1, 2, 2, 2})), G.WithName("x")) + y := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 1, 2), tensor.WithBacking([]float32{0, 1, 0, 1, 1, 2})), G.WithName("y")) output := EucDistance(x, y) m := G.NewTapeMachine(g) if err := m.RunAll(); err != nil { @@ -66,12 +66,12 @@ func TestEucDistance(t *testing.T) { } defer m.Close() So([]int(output.Shape()), ShouldResemble, []int{3, 2}) - So(output.Value().Data(), ShouldResemble, []float64{1, 1, 1, 1, 0, 1}) + So(output.Value().Data(), ShouldResemble, []float32{1, 1, 1, 1, 0, 1}) }) Convey("euc distance 3 dim broadcast x", t, func() { g := G.NewGraph() - x := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 1, 2), tensor.WithBacking([]float64{0, 1, 0, 1, 1, 2})), G.WithName("x")) - y := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float64{0, 2, 0, 0, -1, 1, -1, 1, 1, 2, 2, 2})), G.WithName("y")) + x := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 1, 2), tensor.WithBacking([]float32{0, 1, 0, 1, 1, 2})), G.WithName("x")) + y := G.NodeFromAny(g, tensor.New(tensor.WithShape(3, 2, 2), tensor.WithBacking([]float32{0, 2, 0, 0, -1, 1, -1, 1, 1, 2, 2, 2})), G.WithName("y")) output := EucDistance(x, y) m := G.NewTapeMachine(g) if err := m.RunAll(); err != nil { @@ -79,6 +79,6 @@ func TestEucDistance(t *testing.T) { } defer m.Close() So([]int(output.Shape()), ShouldResemble, []int{3, 2}) - So(output.Value().Data(), ShouldResemble, []float64{1, 1, 1, 1, 0, 1}) + So(output.Value().Data(), ShouldResemble, []float32{1, 1, 1, 1, 0, 1}) }) } diff --git a/model/din/din.go b/model/din/din.go index 6a4a1d9..59c1c81 100644 --- a/model/din/din.go +++ b/model/din/din.go @@ -23,7 +23,7 @@ type DinNet struct { xUserProfile, xUbMatrix, xItemFeature, xCtxFeature *G.Node mlp0, mlp1, mlp2 *G.Node // weights of MLP layers - d0, d1 float64 // dropout probabilities + d0, d1 float32 // dropout probabilities att0 *G.Node // weights of attention layer //att1 *G.Node // weights of Attention layers @@ -36,11 +36,11 @@ type dinModel struct { UBehaviorDim int `json:"uBehaviorDim"` IFeatureDim int `json:"iFeatureDim"` CFeatureDim int `json:"cFeatureDim"` - Mlp0 []float64 `json:"mlp0"` - Mlp1 []float64 `json:"mlp1"` - Mlp2 []float64 `json:"mlp2"` - Att0 []float64 `json:"att0"` - //Att1 []float64 `json:"att1"` + Mlp0 []float32 `json:"mlp0"` + Mlp1 []float32 `json:"mlp1"` + Mlp2 []float32 `json:"mlp2"` + Att0 []float32 `json:"att0"` + //Att1 []float32 `json:"att1"` } func (din *DinNet) Vm() G.VM { @@ -58,11 +58,11 @@ func (din *DinNet) Marshal() (data []byte, err error) { UBehaviorDim: din.uBehaviorDim, IFeatureDim: din.iFeatureDim, CFeatureDim: din.cFeatureDim, - Mlp0: din.mlp0.Value().Data().([]float64), - Mlp1: din.mlp1.Value().Data().([]float64), - Mlp2: din.mlp2.Value().Data().([]float64), - Att0: din.att0.Value().Data().([]float64), - //Att1: din.att1.Value().Data().([]float64), + Mlp0: din.mlp0.Value().Data().([]float32), + Mlp1: din.mlp1.Value().Data().([]float32), + Mlp2: din.mlp2.Value().Data().([]float32), + Att0: din.att0.Value().Data().([]float32), + //Att1: din.att1.Value().Data().([]float32), } //marshal to json @@ -88,20 +88,20 @@ func NewDinNetFromJson(data []byte) (din *DinNet, err error) { // attention layer att0 := G.NewMatrix( g, - dt, + DT, G.WithShape(1, uBehaviorSize), G.WithValue(tensor.New(tensor.WithShape(1, uBehaviorSize), tensor.WithBacking(m.Att0))), G.WithName("att0"), ) //att1 := G.NewMatrix( // g, - // dt, + // DT, // G.WithShape(att0_1, 1), // G.WithValue(tensor.New(tensor.WithShape(att0_1, 1), tensor.WithBacking(m.Att1[i]))), // G.WithName("att1"), //) - mlp0 := G.NewMatrix(g, dt, + mlp0 := G.NewMatrix(g, DT, G.WithShape(uProfileDim+uBehaviorDim+iFeatureDim+cFeatureDim, mlp0_1), G.WithName("mlp0"), G.WithValue(tensor.New( @@ -110,13 +110,13 @@ func NewDinNetFromJson(data []byte) (din *DinNet, err error) { ), ) - mlp1 := G.NewMatrix(g, dt, + mlp1 := G.NewMatrix(g, DT, G.WithShape(mlp0_1, mlp1_2), G.WithName("mlp1"), G.WithValue(tensor.New(tensor.WithShape(mlp0_1, mlp1_2), tensor.WithBacking(m.Mlp1))), ) - mlp2 := G.NewMatrix(g, dt, + mlp2 := G.NewMatrix(g, DT, G.WithShape(mlp1_2, 1), G.WithName("mlp2"), G.WithValue(tensor.New(tensor.WithShape(mlp1_2, 1), tensor.WithBacking(m.Mlp2))), @@ -170,17 +170,17 @@ func NewDinNet( } g := G.NewGraph() // attention layer - att0 := G.NewTensor(g, dt, 2, G.WithShape(1, uBehaviorSize), G.WithName("att0"), G.WithInit(G.ValuesOf(float64(1.0/uBehaviorSize)))) - //att1 := G.NewTensor(g, dt, 3, G.WithShape(uBehaviorSize, att0_1, 1), G.WithName("att1"), G.WithInit(G.HeN(1.0))) + att0 := G.NewTensor(g, DT, 2, G.WithShape(1, uBehaviorSize), G.WithName("att0"), G.WithInit(G.ValuesOf(float32(1.0/uBehaviorSize)))) + //att1 := G.NewTensor(g, DT, 3, G.WithShape(uBehaviorSize, att0_1, 1), G.WithName("att1"), G.WithInit(G.Gaussian(0, 1.0))) // user behaviors are represented as a sequence of item embeddings. Before // being fed into the MLP, we need to flatten the sequence into a single with // sum pooling with Attention as the weights which is the key point of DIN model. - mlp0 := G.NewMatrix(g, dt, G.WithShape(uProfileDim+uBehaviorDim+iFeatureDim+cFeatureDim, mlp0_1), G.WithName("mlp0"), G.WithInit(G.HeN(1.0))) + mlp0 := G.NewMatrix(g, DT, G.WithShape(uProfileDim+uBehaviorDim+iFeatureDim+cFeatureDim, mlp0_1), G.WithName("mlp0"), G.WithInit(G.Gaussian(0, 1.0))) - mlp1 := G.NewMatrix(g, dt, G.WithShape(mlp0_1, mlp1_2), G.WithName("mlp1"), G.WithInit(G.HeN(1.0))) + mlp1 := G.NewMatrix(g, DT, G.WithShape(mlp0_1, mlp1_2), G.WithName("mlp1"), G.WithInit(G.Gaussian(0, 1.0))) - mlp2 := G.NewMatrix(g, dt, G.WithShape(mlp1_2, 1), G.WithName("mlp2"), G.WithInit(G.HeN(1.0))) + mlp2 := G.NewMatrix(g, DT, G.WithShape(mlp1_2, 1), G.WithName("mlp2"), G.WithInit(G.Gaussian(0, 1.0))) return &DinNet{ uProfileDim: uProfileDim, @@ -260,7 +260,7 @@ func (din *DinNet) Fwd(xUserProfile, xUbMatrix, xItemFeature, xCtxFeature *G.Nod nil, []byte{2}, )) - //actOuts := G.NewTensor(din.Graph(), dt, 2, G.WithShape(batchSize, uBehaviorDim), G.WithName("actOuts"), G.WithInit(G.Zeroes())) + //actOuts := G.NewTensor(din.Graph(), DT, 2, G.WithShape(batchSize, uBehaviorDim), G.WithName("actOuts"), G.WithInit(G.Zeroes())) //for i := 0; i < uBehaviorSize; i++ { // // xUserBehaviors[:, i, :], ub.shape: [batchSize, uBehaviorDim] // ub := G.Must(G.Slice(xUserBehaviors, []tensor.Slice{nil, G.S(i)}...)) @@ -290,11 +290,11 @@ func (din *DinNet) Fwd(xUserProfile, xUbMatrix, xItemFeature, xCtxFeature *G.Nod // mlp0.Shape: [userProfileDim+userBehaviorDim+itemFeatureDim+contextFeatureDim, 200] // out.Shape: [batchSize, 200] mlp0Out := G.Must(G.Sigmoid(G.Must(G.Mul(concat, din.mlp0)))) - mlp0Out = G.Must(G.Dropout(mlp0Out, din.d0)) + mlp0Out = G.Must(G.Dropout(mlp0Out, float64(din.d0))) // mlp1.Shape: [200, 80] // out.Shape: [batchSize, 80] mlp1Out := G.Must(G.Sigmoid(G.Must(G.Mul(mlp0Out, din.mlp1)))) - mlp1Out = G.Must(G.Dropout(mlp1Out, din.d1)) + mlp1Out = G.Must(G.Dropout(mlp1Out, float64(din.d1))) // mlp2.Shape: [80, 1] // out.Shape: [batchSize, 1] mlp2Out := G.Must(G.Sigmoid(G.Must(G.Mul(mlp1Out, din.mlp2)))) diff --git a/model/din/model.go b/model/din/model.go index 7a044fe..3748354 100644 --- a/model/din/model.go +++ b/model/din/model.go @@ -13,7 +13,7 @@ import ( "gorgonia.org/tensor" ) -var dt = tensor.Float64 +var DT = tensor.Float32 const ( // magic numbers for din paper @@ -41,12 +41,12 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in m Model, ) (err error) { g := m.Graph() - xUserProfile := G.NewMatrix(g, dt, G.WithShape(batchSize, uProfileDim), G.WithName("xUserProfile")) - //xUserBehaviors := G.NewTensor(g, dt, 3, G.WithShape(batchSize, uBehaviorSize, uBehaviorDim), G.WithName("xUserBehaviors")) - xUserBehaviorMatrix := G.NewMatrix(g, dt, G.WithShape(batchSize, uBehaviorSize*uBehaviorDim), G.WithName("xUserBehaviorMatrix")) - xItemFeature := G.NewMatrix(g, dt, G.WithShape(batchSize, iFeatureDim), G.WithName("xItemFeature")) - xCtxFeature := G.NewMatrix(g, dt, G.WithShape(batchSize, cFeatureDim), G.WithName("xCtxFeature")) - y := G.NewTensor(g, dt, 2, G.WithShape(batchSize, 1), G.WithName("y")) + xUserProfile := G.NewMatrix(g, DT, G.WithShape(batchSize, uProfileDim), G.WithName("xUserProfile")) + //xUserBehaviors := G.NewTensor(g, DT, 3, G.WithShape(batchSize, uBehaviorSize, uBehaviorDim), G.WithName("xUserBehaviors")) + xUserBehaviorMatrix := G.NewMatrix(g, DT, G.WithShape(batchSize, uBehaviorSize*uBehaviorDim), G.WithName("xUserBehaviorMatrix")) + xItemFeature := G.NewMatrix(g, DT, G.WithShape(batchSize, iFeatureDim), G.WithName("xItemFeature")) + xCtxFeature := G.NewMatrix(g, DT, G.WithShape(batchSize, cFeatureDim), G.WithName("xCtxFeature")) + y := G.NewTensor(g, DT, 2, G.WithShape(batchSize, 1), G.WithName("y")) //m := NewDinNet(g, uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim) if err = m.Fwd(xUserProfile, xUserBehaviorMatrix, xItemFeature, xCtxFeature, batchSize, uBehaviorSize, uBehaviorDim); err != nil { log.Fatalf("%+v", err) @@ -55,8 +55,8 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in //losses := G.Must(G.HadamardProd(G.Must(G.Neg(G.Must(G.Log(m.out)))), y)) //losses := G.Must(G.Square(G.Must(G.Sub(m.Out(), y)))) positive := G.Must(G.HadamardProd(G.Must(G.Log(m.Out())), y)) - negative := G.Must(G.HadamardProd(G.Must(G.Log(G.Must(G.Sub(G.NewConstant(float64(1.0+1e-8)), m.Out())))), G.Must(G.Sub(G.NewConstant(float64(1.0)), y)))) - //negative := G.Must(G.Log(G.Must(G.Sub(G.NewConstant(float64(1.000000001)), m.Out())))) + negative := G.Must(G.HadamardProd(G.Must(G.Log(G.Must(G.Sub(G.NewConstant(float32(1.0+1e-8)), m.Out())))), G.Must(G.Sub(G.NewConstant(float32(1.0)), y)))) + //negative := G.Must(G.Log(G.Must(G.Sub(G.NewConstant(float32(1.000000001)), m.Out())))) cost := G.Must(G.Neg(G.Must(G.Mean(G.Must(G.Add(positive, negative)))))) // we want to track costs @@ -93,11 +93,11 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in ) m.SetVM(vm) - //solver := G.NewRMSPropSolver(G.WithBatchSize(float64(batchSize))) - //solver := G.NewVanillaSolver(G.WithBatchSize(float64(batchSize)), G.WithLearnRate(0.001)) - //solver := G.NewBarzilaiBorweinSolver(G.WithBatchSize(float64(batchSize)), G.WithLearnRate(0.001)) - //solver := G.NewAdaGradSolver(G.WithBatchSize(float64(batchSize)), G.WithLearnRate(0.001)) - //solver := G.NewMomentum(G.WithBatchSize(float64(batchSize)), G.WithLearnRate(0.001)) + //solver := G.NewRMSPropSolver(G.WithBatchSize(float32(batchSize))) + //solver := G.NewVanillaSolver(G.WithBatchSize(float32(batchSize)), G.WithLearnRate(0.001)) + //solver := G.NewBarzilaiBorweinSolver(G.WithBatchSize(float32(batchSize)), G.WithLearnRate(0.001)) + //solver := G.NewAdaGradSolver(G.WithBatchSize(float32(batchSize)), G.WithLearnRate(0.001)) + //solver := G.NewMomentum(G.WithBatchSize(float32(batchSize)), G.WithLearnRate(0.001)) solver := G.NewAdamSolver(G.WithLearnRate(0.01), G.WithBatchSize(float64(batchSize)), G.WithL2Reg(0.0001)) //defer func() { // vm.Close() @@ -110,7 +110,7 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in log.Printf("Batches %d", batches) bar := pb.New(batches) var ( - bestCost = math.MaxFloat64 + bestCost float32 = math.MaxFloat32 noImprove int ) @@ -180,7 +180,7 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in vm.Reset() bar.Increment() } - costVal := cost.Value().Data().(float64) + costVal := cost.Value().Data().(float32) if costVal < bestCost { bestCost = costVal noImprove = 0 @@ -202,10 +202,10 @@ func InitForwardOnlyVm(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cF m Model, ) (err error) { g := m.Graph() - xUserProfile := G.NewMatrix(g, dt, G.WithShape(batchSize, uProfileDim), G.WithName("xUserProfile")) - xUserBehaviorMatrix := G.NewMatrix(g, dt, G.WithShape(batchSize, uBehaviorSize*uBehaviorDim), G.WithName("xUserBehaviorMatrix")) - xItemFeature := G.NewMatrix(g, dt, G.WithShape(batchSize, iFeatureDim), G.WithName("xItemFeature")) - xCtxFeature := G.NewMatrix(g, dt, G.WithShape(batchSize, cFeatureDim), G.WithName("xCtxFeature")) + xUserProfile := G.NewMatrix(g, DT, G.WithShape(batchSize, uProfileDim), G.WithName("xUserProfile")) + xUserBehaviorMatrix := G.NewMatrix(g, DT, G.WithShape(batchSize, uBehaviorSize*uBehaviorDim), G.WithName("xUserBehaviorMatrix")) + xItemFeature := G.NewMatrix(g, DT, G.WithShape(batchSize, iFeatureDim), G.WithName("xItemFeature")) + xCtxFeature := G.NewMatrix(g, DT, G.WithShape(batchSize, cFeatureDim), G.WithName("xCtxFeature")) if err = m.Fwd(xUserProfile, xUserBehaviorMatrix, xItemFeature, xCtxFeature, batchSize, uBehaviorSize, uBehaviorDim); err != nil { return @@ -224,7 +224,7 @@ func InitForwardOnlyVm(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cF return } -func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs tensor.Tensor) (y []float64, err error) { +func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs tensor.Tensor) (y []float32, err error) { //input nodes inputNodes := m.In() xUserProfile := inputNodes[0] @@ -299,7 +299,7 @@ func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs te } //get y - yVal := outputNode.Value().Data().([]float64) + yVal := outputNode.Value().Data().([]float32) for i := 0; i < end-start; i++ { y = append(y, yVal[i]) } @@ -312,14 +312,14 @@ func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs te func accuracy(prediction, y []float64) float64 { var ok float64 for i := 0; i < len(prediction); i++ { - if math.Round(prediction[i]-y[i]) == 0 { + if math.Round(float64(prediction[i]-y[i])) == 0 { ok += 1.0 } } return ok / float64(len(y)) } -func rocauc(pred, y []float64) float64 { +func RocAuc(pred, y []float64) float64 { boolY := make([]float64, len(y)) for i := 0; i < len(y); i++ { if y[i] > 0.5 { @@ -333,3 +333,22 @@ func rocauc(pred, y []float64) float64 { return metrics.ROCAUCScore(yTrue, yScore, "", nil) } + +func RocAuc32(pred, y []float32) float32 { + boolY := make([]float64, len(y)) + for i := 0; i < len(y); i++ { + if y[i] > 0.5 { + boolY[i] = 1.0 + } else { + boolY[i] = 0.0 + } + } + pred64 := make([]float64, len(pred)) + for i := 0; i < len(pred); i++ { + pred64[i] = float64(pred[i]) + } + yTrue := mat.NewDense(len(y), 1, boolY) + yScore := mat.NewDense(len(pred), 1, pred64) + + return float32(metrics.ROCAUCScore(yTrue, yScore, "", nil)) +} diff --git a/model/din/model_test.go b/model/din/model_test.go index 36f973f..acb3de6 100644 --- a/model/din/model_test.go +++ b/model/din/model_test.go @@ -37,39 +37,39 @@ func TestMultiModel(t *testing.T) { } inputWidth = uProfileDim + uBehaviorSize*uBehaviorDim + iFeatureDim + cFeatureDim ) - inputSlice := make([]float64, numExamples*inputWidth) + inputSlice := make([]float32, numExamples*inputWidth) for i := 0; i < numExamples; i++ { for j := 0; j < sampleInfo.UserProfileRange[1]; j++ { - inputSlice[i*inputWidth+j] = rand.Float64() + inputSlice[i*inputWidth+j] = rand.Float32() } for j := sampleInfo.CtxFeatureRange[0]; j < sampleInfo.CtxFeatureRange[1]; j++ { - inputSlice[i*inputWidth+j] = rand.Float64() + inputSlice[i*inputWidth+j] = rand.Float32() } for j := sampleInfo.UserBehaviorRange[0] + uBehaviorDim; j < sampleInfo.UserBehaviorRange[0]+2*uBehaviorDim; j++ { - inputSlice[i*inputWidth+j] = rand.Float64() + inputSlice[i*inputWidth+j] = rand.Float32() } for j := sampleInfo.ItemFeatureRange[0]; j < sampleInfo.ItemFeatureRange[1]; j++ { - inputSlice[i*inputWidth+j] = rand.Float64() + inputSlice[i*inputWidth+j] = rand.Float32() } } //for i := 0; i < numExamples*inputWidth; i++ { - // inputSlice[i] = rand.Float64() + // inputSlice[i] = rand.Float32() //} inputs := tensor.New(tensor.WithShape(numExamples, inputWidth), tensor.WithBacking(inputSlice)) - labelSlice := make([]float64, numExamples) + labelSlice := make([]float32, numExamples) for i := 0; i < numExamples; i++ { //distance of uProfile and cFeature slice - var dist1, dist2 float64 + var dist1, dist2 float32 for j := 0; j < uProfileDim; j++ { - dist1 += math.Abs(inputSlice[i*inputWidth+sampleInfo.UserProfileRange[0]+j] - inputSlice[i*inputWidth+sampleInfo.CtxFeatureRange[0]+j]) + dist1 += float32(math.Abs(float64(inputSlice[i*inputWidth+sampleInfo.UserProfileRange[0]+j] - inputSlice[i*inputWidth+sampleInfo.CtxFeatureRange[0]+j]))) } - labelSlice[i] = dist1 / float64(uProfileDim) + labelSlice[i] = dist1 / float32(uProfileDim) //distance of 2nd uBehavior and iFeature for j := 0; j < uBehaviorDim; j++ { - dist2 += math.Abs(inputSlice[i*inputWidth+sampleInfo.UserBehaviorRange[0]+uBehaviorDim+j] - inputSlice[i*inputWidth+sampleInfo.ItemFeatureRange[0]+j]) + dist2 += float32(math.Abs(float64(inputSlice[i*inputWidth+sampleInfo.UserBehaviorRange[0]+uBehaviorDim+j] - inputSlice[i*inputWidth+sampleInfo.ItemFeatureRange[0]+j]))) } - labelSlice[i] = math.Round((labelSlice[i] + (dist2 / float64(uBehaviorDim))) * 0.6) + labelSlice[i] = float32(math.Round(float64((labelSlice[i] + (dist2 / float32(uBehaviorDim))) * 0.6))) } labels := tensor.New(tensor.WithShape(numExamples, 1), tensor.WithBacking(labelSlice)) @@ -103,7 +103,7 @@ func TestMultiModel(t *testing.T) { So(predictions, ShouldNotBeNil) So(predictions, ShouldHaveLength, testSamples) log.Debugf("predictions: %+v", predictions) - auc := rocauc(predictions, labels.Data().([]float64)[:testSamples]) + auc := RocAuc32(predictions, labels.Data().([]float32)[:testSamples]) log.Debugf("auc: %f", auc) So(auc, ShouldBeGreaterThan, 0.5) }) @@ -136,7 +136,7 @@ func TestMultiModel(t *testing.T) { So(predictions, ShouldNotBeNil) So(predictions, ShouldHaveLength, testSamples) log.Debugf("predictions: %+v", predictions) - auc := rocauc(predictions, labels.Data().([]float64)[:testSamples]) + auc := RocAuc32(predictions, labels.Data().([]float32)[:testSamples]) log.Debugf("auc: %f", auc) So(auc, ShouldBeGreaterThan, 0.5) }) diff --git a/model/din/simplemlp.go b/model/din/simplemlp.go index c993e33..fa8c40d 100644 --- a/model/din/simplemlp.go +++ b/model/din/simplemlp.go @@ -19,7 +19,7 @@ type SimpleMLP struct { xUserProfile, xUbMatrix, xItemFeature, xCtxFeature *G.Node //learnable nodes mlp0, mlp1, mlp2 *G.Node - d0, d1 float64 // dropout probabilities + d0, d1 float32 // dropout probabilities out *G.Node } @@ -33,9 +33,9 @@ type mlpModel struct { UBehaviorDim int `json:"uBehaviorDim"` IFeatureDim int `json:"iFeatureDim"` CFeatureDim int `json:"cFeatureDim"` - Mlp0 []float64 `json:"mlp0"` - Mlp1 []float64 `json:"mlp1"` - Mlp2 []float64 `json:"mlp2"` + Mlp0 []float32 `json:"mlp0"` + Mlp1 []float32 `json:"mlp1"` + Mlp2 []float32 `json:"mlp2"` } func (mlp *SimpleMLP) Marshal() (data []byte, err error) { @@ -45,9 +45,9 @@ func (mlp *SimpleMLP) Marshal() (data []byte, err error) { UBehaviorDim: mlp.uBehaviorDim, IFeatureDim: mlp.iFeatureDim, CFeatureDim: mlp.cFeatureDim, - Mlp0: mlp.mlp0.Value().Data().([]float64), - Mlp1: mlp.mlp1.Value().Data().([]float64), - Mlp2: mlp.mlp2.Value().Data().([]float64), + Mlp0: mlp.mlp0.Value().Data().([]float32), + Mlp1: mlp.mlp1.Value().Data().([]float32), + Mlp2: mlp.mlp2.Value().Data().([]float32), } return json.Marshal(model) } @@ -67,19 +67,19 @@ func NewSimpleMLPFromJson(data []byte) (mlp *SimpleMLP, err error) { mlp0_0 = uProfileDim + uBehaviorSize*uBehaviorDim + iFeatureDim + cFeatureDim ) - mlp0 := G.NewMatrix(g, dt, + mlp0 := G.NewMatrix(g, DT, G.WithShape(mlp0_0, mlp0_1), G.WithName("mlp0"), G.WithValue(tensor.New(tensor.WithShape(mlp0_0, mlp0_1), tensor.WithBacking(m.Mlp0))), ) - mlp1 := G.NewMatrix(g, dt, + mlp1 := G.NewMatrix(g, DT, G.WithShape(mlp0_1, mlp1_2), G.WithName("mlp1"), G.WithValue(tensor.New(tensor.WithShape(mlp0_1, mlp1_2), tensor.WithBacking(m.Mlp1))), ) - mlp2 := G.NewMatrix(g, dt, + mlp2 := G.NewMatrix(g, DT, G.WithShape(mlp1_2, 1), G.WithName("mlp2"), G.WithValue(tensor.New(tensor.WithShape(mlp1_2, 1), tensor.WithBacking(m.Mlp2))), @@ -114,9 +114,9 @@ func NewSimpleMLP( cFeatureDim int, ) (mlp *SimpleMLP) { g := G.NewGraph() - mlp0 := G.NewMatrix(g, G.Float64, G.WithShape(uProfileDim+uBehaviorSize*uBehaviorDim+iFeatureDim+cFeatureDim, mlp0_1), G.WithName("mlp0"), G.WithInit(G.HeN(1.0))) - mlp1 := G.NewMatrix(g, G.Float64, G.WithShape(mlp0_1, mlp1_2), G.WithName("mlp1"), G.WithInit(G.HeN(1.0))) - mlp2 := G.NewMatrix(g, G.Float64, G.WithShape(mlp1_2, 1), G.WithName("mlp2"), G.WithInit(G.HeN(1.0))) + mlp0 := G.NewMatrix(g, G.Float32, G.WithShape(uProfileDim+uBehaviorSize*uBehaviorDim+iFeatureDim+cFeatureDim, mlp0_1), G.WithName("mlp0"), G.WithInit(G.Gaussian(0, 1.0))) + mlp1 := G.NewMatrix(g, G.Float32, G.WithShape(mlp0_1, mlp1_2), G.WithName("mlp1"), G.WithInit(G.Gaussian(0, 1.0))) + mlp2 := G.NewMatrix(g, G.Float32, G.WithShape(mlp1_2, 1), G.WithName("mlp2"), G.WithInit(G.Gaussian(0, 1.0))) return &SimpleMLP{ uProfileDim: uProfileDim, uBehaviorSize: uBehaviorSize, @@ -154,9 +154,9 @@ func (mlp *SimpleMLP) Fwd(xUserProfile, ubMatrix, xItemFeature, xCtxFeature *G.N x := G.Must(G.Concat(1, xUserProfile, xUserBehaviors, xItemFeature, xCtxFeature)) // mlp mlp0Out := G.Must(G.Sigmoid(G.Must(G.Mul(x, mlp.mlp0)))) - mlp0Out = G.Must(G.Dropout(mlp0Out, mlp.d0)) + mlp0Out = G.Must(G.Dropout(mlp0Out, float64(mlp.d0))) mlp1Out := G.Must(G.Sigmoid(G.Must(G.Mul(mlp0Out, mlp.mlp1)))) - mlp1Out = G.Must(G.Dropout(mlp1Out, mlp.d1)) + mlp1Out = G.Must(G.Dropout(mlp1Out, float64(mlp.d1))) mlp.out = G.Must(G.Sigmoid(G.Must(G.Mul(mlp1Out, mlp.mlp2)))) mlp.xUserProfile = xUserProfile diff --git a/ps/batch.go b/ps/batch.go deleted file mode 100644 index 91d5e64..0000000 --- a/ps/batch.go +++ /dev/null @@ -1,187 +0,0 @@ -package ps - -import ( - "sync" - "time" - - "github.com/auxten/edgeRec/nn" -) - -// BatchTrainer implements parallelized batch training -type BatchTrainer struct { - *internalb - verbosity int - batchSize int - parallelism int - solver Solver - printer *StatsPrinter -} - -type internalb struct { - deltas [][][]float64 - partialDeltas [][][][]float64 - accumulatedDeltas [][][]float64 - moments [][][]float64 -} - -func newBatchTraining(layers []*nn.Layer, parallelism int) *internalb { - deltas := make([][][]float64, parallelism) - partialDeltas := make([][][][]float64, parallelism) - accumulatedDeltas := make([][][]float64, len(layers)) - for w := 0; w < parallelism; w++ { - deltas[w] = make([][]float64, len(layers)) - partialDeltas[w] = make([][][]float64, len(layers)) - - for i, l := range layers { - deltas[w][i] = make([]float64, len(l.Neurons)) - accumulatedDeltas[i] = make([][]float64, len(l.Neurons)) - partialDeltas[w][i] = make([][]float64, len(l.Neurons)) - for j, n := range l.Neurons { - partialDeltas[w][i][j] = make([]float64, len(n.In)) - accumulatedDeltas[i][j] = make([]float64, len(n.In)) - } - } - } - return &internalb{ - deltas: deltas, - partialDeltas: partialDeltas, - accumulatedDeltas: accumulatedDeltas, - } -} - -// NewBatchTrainer returns a BatchTrainer -func NewBatchTrainer(solver Solver, verbosity, batchSize, parallelism int) *BatchTrainer { - return &BatchTrainer{ - solver: solver, - verbosity: verbosity, - batchSize: iparam(batchSize, 1), - parallelism: iparam(parallelism, 1), - printer: NewStatsPrinter(), - } -} - -// Train trains n -func (t *BatchTrainer) Train(n *nn.Neural, examples, validation Samples, iterations int, shuffle bool) { - t.internalb = newBatchTraining(n.Layers, t.parallelism) - - train := make(Samples, len(examples)) - copy(train, examples) - - workCh := make(chan Sample, t.parallelism) - nets := make([]*nn.Neural, t.parallelism) - - wg := sync.WaitGroup{} - for i := 0; i < t.parallelism; i++ { - nets[i] = nn.NewNeural(n.Config) - - go func(id int, workCh <-chan Sample) { - n := nets[id] - for e := range workCh { - n.Forward(e.Input) - t.calculateDeltas(n, e.Response, id) - wg.Done() - } - }(i, workCh) - } - - t.printer.Init(n) - t.solver.Init(n.NumWeights()) - - ts := time.Now() - for it := 1; it <= iterations; it++ { - if shuffle { - train.Shuffle() - } - batches := train.SplitSize(t.batchSize) - - for _, b := range batches { - currentWeights := n.Weights() - for _, n := range nets { - n.ApplyWeights(currentWeights) - } - - wg.Add(len(b)) - for _, item := range b { - workCh <- item - } - wg.Wait() - - for _, wPD := range t.partialDeltas { - for i, iPD := range wPD { - iAD := t.accumulatedDeltas[i] - for j, jPD := range iPD { - jAD := iAD[j] - for k, v := range jPD { - jAD[k] += v - jPD[k] = 0 - } - } - } - } - - t.update(n, it) - } - - if t.verbosity > 0 && it%t.verbosity == 0 && len(validation) > 0 { - t.printer.PrintProgress(n, validation, time.Since(ts), it) - } - } -} - -func (t *BatchTrainer) calculateDeltas(n *nn.Neural, ideal []float64, wid int) { - loss := nn.GetLoss(n.Config.Loss) - deltas := t.deltas[wid] - partialDeltas := t.partialDeltas[wid] - lastDeltas := deltas[len(n.Layers)-1] - - for i, n := range n.Layers[len(n.Layers)-1].Neurons { - lastDeltas[i] = loss.Df( - n.Value, - ideal[i], - n.DActivate(n.Value)) - } - - for i := len(n.Layers) - 2; i >= 0; i-- { - l := n.Layers[i] - iD := deltas[i] - nextD := deltas[i+1] - for j, n := range l.Neurons { - var sum float64 - for k, s := range n.Out { - sum += s.Weight * nextD[k] - } - iD[j] = n.DActivate(n.Value) * sum - } - } - - for i, l := range n.Layers { - iD := deltas[i] - iPD := partialDeltas[i] - for j, n := range l.Neurons { - jD := iD[j] - jPD := iPD[j] - for k, s := range n.In { - jPD[k] += jD * s.In - } - } - } -} - -func (t *BatchTrainer) update(n *nn.Neural, it int) { - var idx int - for i, l := range n.Layers { - iAD := t.accumulatedDeltas[i] - for j, n := range l.Neurons { - jAD := iAD[j] - for k, s := range n.In { - update := t.solver.Update(s.Weight, - jAD[k], - it, - idx) - s.Weight += update - jAD[k] = 0 - idx++ - } - } - } -} diff --git a/ps/batch_test.go b/ps/batch_test.go deleted file mode 100644 index 7c11a79..0000000 --- a/ps/batch_test.go +++ /dev/null @@ -1,39 +0,0 @@ -package ps - -import ( - "math/rand" - "runtime" - "testing" - - "github.com/auxten/edgeRec/nn" -) - -func Benchmark_xor(b *testing.B) { - rand.Seed(0) - n := nn.NewNeural(&nn.Config{ - Inputs: 2, - Layout: []int{32, 32, 1}, - Activation: nn.ActivationSigmoid, - Mode: nn.ModeBinary, - Weight: nn.NewUniform(.25, 0), - Bias: true, - }) - exs := Samples{ - {[]float64{0, 0}, []float64{0}}, - {[]float64{1, 0}, []float64{1}}, - {[]float64{0, 1}, []float64{1}}, - {[]float64{1, 1}, []float64{0}}, - } - const minSamples = 4000 - var dupExs Samples - for len(dupExs) < minSamples { - dupExs = append(dupExs, exs...) - } - - for i := 0; i < b.N; i++ { - const iterations = 20 - solver := NewAdam(0.001, 0.9, 0.999, 1e-8) - trainer := NewBatchTrainer(solver, iterations, len(dupExs)/2, runtime.NumCPU()) - trainer.Train(n, dupExs, dupExs, iterations, true) - } -} diff --git a/ps/printer.go b/ps/printer.go deleted file mode 100644 index ccae573..0000000 --- a/ps/printer.go +++ /dev/null @@ -1,68 +0,0 @@ -package ps - -import ( - "fmt" - "os" - "text/tabwriter" - "time" - - "github.com/auxten/edgeRec/nn" -) - -// StatsPrinter prints training progress -type StatsPrinter struct { - w *tabwriter.Writer -} - -// NewStatsPrinter creates a StatsPrinter -func NewStatsPrinter() *StatsPrinter { - return &StatsPrinter{tabwriter.NewWriter(os.Stdout, 16, 0, 3, ' ', 0)} -} - -// Init initializes printer -func (p *StatsPrinter) Init(n *nn.Neural) { - fmt.Fprintf(p.w, "Epochs\tElapsed\tLoss (%s)\t", n.Config.Loss) - if n.Config.Mode == nn.ModeMultiClass { - fmt.Fprintf(p.w, "Accuracy\t\n---\t---\t---\t---\t\n") - } else { - fmt.Fprintf(p.w, "\n---\t---\t---\t\n") - } -} - -// PrintProgress prints the current state of training -func (p *StatsPrinter) PrintProgress(n *nn.Neural, validation Samples, elapsed time.Duration, iteration int) { - fmt.Fprintf(p.w, "%d\t%s\t%.4f\t%s\n", - iteration, - elapsed.String(), - crossValidate(n, validation), - formatAccuracy(n, validation)) - p.w.Flush() -} - -func formatAccuracy(n *nn.Neural, validation Samples) string { - if n.Config.Mode == nn.ModeMultiClass { - return fmt.Sprintf("%.2f\t", accuracy(n, validation)) - } - return "" -} - -func accuracy(n *nn.Neural, validation Samples) float64 { - correct := 0 - for _, e := range validation { - est := n.Predict(e.Input) - if nn.ArgMax(e.Response) == nn.ArgMax(est) { - correct++ - } - } - return float64(correct) / float64(len(validation)) -} - -func crossValidate(n *nn.Neural, validation Samples) float64 { - predictions, responses := make([][]float64, len(validation)), make([][]float64, len(validation)) - for i := 0; i < len(validation); i++ { - predictions[i] = n.Predict(validation[i].Input) - responses[i] = validation[i].Response - } - - return nn.GetLoss(n.Config.Loss).F(predictions, responses) -} diff --git a/ps/sample.go b/ps/sample.go deleted file mode 100644 index 8a57fdb..0000000 --- a/ps/sample.go +++ /dev/null @@ -1,58 +0,0 @@ -package ps - -import "math/rand" - -// Sample is an input-target pair -type Sample struct { - Input []float64 - Response []float64 -} - -// Samples is a set of input-output pairs -type Samples []Sample - -// Shuffle shuffles slice in-place -func (e Samples) Shuffle() { - for i := range e { - j := rand.Intn(i + 1) - e[i], e[j] = e[j], e[i] - } -} - -// Split assigns each element to two new slices -// according to probability p -func (e Samples) Split(p float64) (first, second Samples) { - for i := 0; i < len(e); i++ { - if p > rand.Float64() { - first = append(first, e[i]) - } else { - second = append(second, e[i]) - } - } - return -} - -// SplitSize splits slice into parts of size size -func (e Samples) SplitSize(size int) []Samples { - res := make([]Samples, 0) - for i := 0; i < len(e); i += size { - res = append(res, e[i:min(i+size, len(e))]) - } - return res -} - -// SplitN splits slice into n parts -func (e Samples) SplitN(n int) []Samples { - res := make([]Samples, n) - for i, el := range e { - res[i%n] = append(res[i%n], el) - } - return res -} - -func min(a, b int) int { - if a <= b { - return a - } - return b -} diff --git a/ps/sample_test.go b/ps/sample_test.go deleted file mode 100644 index 57255d2..0000000 --- a/ps/sample_test.go +++ /dev/null @@ -1,39 +0,0 @@ -package ps - -import ( - "math/rand" - "testing" - - "github.com/stretchr/testify/assert" -) - -func Test_SplitSize(t *testing.T) { - e := make(Samples, 10) - - batches := e.SplitSize(2) - assert.Len(t, batches, 5) - for _, batch := range batches { - assert.Equal(t, 2, len(batch)) - } -} - -func Test_SplitN(t *testing.T) { - e := make(Samples, 10) - - partitions := e.SplitN(3) - assert.Len(t, partitions, 3) - assert.Len(t, partitions[0], 4) - assert.Len(t, partitions[1], 3) - assert.Len(t, partitions[2], 3) -} - -func Test_Split(t *testing.T) { - rand.Seed(0) - - e := make(Samples, 100) - - a, b := e.Split(0.5) - - assert.InEpsilon(t, len(a), 50, 0.1) - assert.InEpsilon(t, len(b), 50, 0.1) -} diff --git a/ps/solver.go b/ps/solver.go deleted file mode 100644 index 42674e5..0000000 --- a/ps/solver.go +++ /dev/null @@ -1,95 +0,0 @@ -package ps - -import "math" - -// Solver implements an update rule for training a NN -type Solver interface { - Init(size int) - Update(value, gradient float64, iteration, idx int) float64 -} - -// SGD is stochastic gradient descent with nesterov/momentum -type SGD struct { - lr float64 - decay float64 - momentum float64 - nesterov bool - moments []float64 -} - -// NewSGD returns a new SGD solver -func NewSGD(lr, momentum, decay float64, nesterov bool) *SGD { - return &SGD{ - lr: fparam(lr, 0.01), - decay: decay, - momentum: momentum, - nesterov: nesterov, - } -} - -// Init initializes vectors using number of weights in network -func (o *SGD) Init(size int) { - o.moments = make([]float64, size) -} - -// Update returns the update for a given weight -func (o *SGD) Update(value, gradient float64, iteration, idx int) float64 { - lr := o.lr / (1 + o.decay*float64(iteration)) - - o.moments[idx] = o.momentum*o.moments[idx] - lr*gradient - - if o.nesterov { - o.moments[idx] = o.momentum*o.moments[idx] - lr*gradient - } - - return o.moments[idx] -} - -// Adam is an Adam solver -type Adam struct { - lr float64 - beta float64 - beta2 float64 - epsilon float64 - - v, m []float64 -} - -// NewAdam returns a new Adam solver -func NewAdam(lr, beta, beta2, epsilon float64) *Adam { - return &Adam{ - lr: fparam(lr, 0.001), - beta: fparam(beta, 0.9), - beta2: fparam(beta2, 0.999), - epsilon: fparam(epsilon, 1e-8), - } -} - -// Init initializes vectors using number of weights in network -func (o *Adam) Init(size int) { - o.v, o.m = make([]float64, size), make([]float64, size) -} - -// Update returns the update for a given weight -func (o *Adam) Update(value, gradient float64, t, idx int) float64 { - lrt := o.lr * (math.Sqrt(1.0 - math.Pow(o.beta2, float64(t)))) / - (1.0 - math.Pow(o.beta, float64(t))) - o.m[idx] = o.beta*o.m[idx] + (1.0-o.beta)*gradient - o.v[idx] = o.beta2*o.v[idx] + (1.0-o.beta2)*math.Pow(gradient, 2.0) - - return -lrt * (o.m[idx] / (math.Sqrt(o.v[idx]) + o.epsilon)) -} - -func fparam(val, fallback float64) float64 { - if val == 0.0 { - return fallback - } - return val -} - -func iparam(val, fallback int) int { - if val == 0 { - return fallback - } - return val -} diff --git a/ps/trainer.go b/ps/trainer.go deleted file mode 100644 index a1cbb25..0000000 --- a/ps/trainer.go +++ /dev/null @@ -1,109 +0,0 @@ -package ps - -import ( - "time" - - "github.com/auxten/edgeRec/nn" -) - -// Trainer is a neural network trainer -type Trainer interface { - Train(n *nn.Neural, examples, validation Samples, iterations int, shuffle bool) -} - -// OnlineTrainer is a basic, online network trainer -type OnlineTrainer struct { - *internal - solver Solver - printer *StatsPrinter - verbosity int -} - -// NewTrainer creates a new trainer -func NewTrainer(solver Solver, verbosity int) *OnlineTrainer { - return &OnlineTrainer{ - solver: solver, - printer: NewStatsPrinter(), - verbosity: verbosity, - } -} - -type internal struct { - deltas [][]float64 -} - -func newTraining(layers []*nn.Layer) *internal { - deltas := make([][]float64, len(layers)) - for i, l := range layers { - deltas[i] = make([]float64, len(l.Neurons)) - } - return &internal{ - deltas: deltas, - } -} - -// Train trains n -func (t *OnlineTrainer) Train(n *nn.Neural, examples, validation Samples, iterations int, shuffle bool) { - t.internal = newTraining(n.Layers) - - t.printer.Init(n) - t.solver.Init(n.NumWeights()) - - ts := time.Now() - for i := 1; i <= iterations; i++ { - if shuffle { - examples.Shuffle() - } - for j := 0; j < len(examples); j++ { - t.learn(n, examples[j], i) - } - if t.verbosity > 0 && i%t.verbosity == 0 && len(validation) > 0 { - t.printer.PrintProgress(n, validation, time.Since(ts), i) - } - } -} - -func (t *OnlineTrainer) learn(n *nn.Neural, e Sample, it int) { - n.Forward(e.Input) - t.calculateDeltas(n, e.Response) - t.update(n, it) -} - -func (t *OnlineTrainer) Predict(n *nn.Neural, input []float64) []float64 { - return n.Predict(input) -} - -func (t *OnlineTrainer) calculateDeltas(n *nn.Neural, ideal []float64) { - for i, neuron := range n.Layers[len(n.Layers)-1].Neurons { - t.deltas[len(n.Layers)-1][i] = nn.GetLoss(n.Config.Loss).Df( - neuron.Value, - ideal[i], - neuron.DActivate(neuron.Value)) - } - - for i := len(n.Layers) - 2; i >= 0; i-- { - for j, neuron := range n.Layers[i].Neurons { - var sum float64 - for k, s := range neuron.Out { - sum += s.Weight * t.deltas[i+1][k] - } - t.deltas[i][j] = neuron.DActivate(neuron.Value) * sum - } - } -} - -func (t *OnlineTrainer) update(n *nn.Neural, it int) { - var idx int - for i, l := range n.Layers { - for j := range l.Neurons { - for k := range l.Neurons[j].In { - update := t.solver.Update(l.Neurons[j].In[k].Weight, - t.deltas[i][j]*l.Neurons[j].In[k].In, - it, - idx) - l.Neurons[j].In[k].Weight += update - idx++ - } - } - } -} diff --git a/ps/trainer_test.go b/ps/trainer_test.go deleted file mode 100644 index 0bdce69..0000000 --- a/ps/trainer_test.go +++ /dev/null @@ -1,244 +0,0 @@ -package ps - -import ( - "fmt" - "math" - "math/rand" - "testing" - - "github.com/auxten/edgeRec/nn" - "github.com/stretchr/testify/assert" -) - -func Test_BoundedRegression(t *testing.T) { - rand.Seed(0) - - funcs := []func(float64) float64{ - math.Sin, - func(x float64) float64 { return math.Pow(x, 2) }, - math.Sqrt, - } - - for _, f := range funcs { - - data := Samples{} - for i := 0.0; i < 1; i += 0.01 { - data = append(data, Sample{Input: []float64{i}, Response: []float64{f(i)}}) - } - n := nn.NewNeural(&nn.Config{ - Inputs: 1, - Layout: []int{4, 4, 1}, - Activation: nn.ActivationTanh, - Mode: nn.ModeRegression, - Weight: nn.NewUniform(0.5, 0), - Bias: true, - }) - - trainer := NewTrainer(NewSGD(0.25, 0.5, 0, false), 0) - trainer.Train(n, data, nil, 5000, true) - - tests := []float64{0.0, 0.1, 0.25, 0.5, 0.75, 0.9} - for _, x := range tests { - assert.InEpsilon(t, f(x)+1, n.Predict([]float64{x})[0]+1, 0.1) - } - } -} - -func Test_RegressionLinearOuts(t *testing.T) { - rand.Seed(0) - squares := Samples{} - for i := 0.0; i < 100.0; i++ { - squares = append(squares, Sample{Input: []float64{i}, Response: []float64{math.Sqrt(i)}}) - } - squares.Shuffle() - n := nn.NewNeural(&nn.Config{ - Inputs: 1, - Layout: []int{3, 3, 1}, - Activation: nn.ActivationReLU, - Mode: nn.ModeRegression, - Weight: nn.NewNormal(0.5, 0.5), - Bias: true, - }) - - trainer := NewBatchTrainer(NewAdam(0.01, 0, 0, 0), 0, 25, 2) - trainer.Train(n, squares, nil, 25000, true) - - for i := 0; i < 100; i++ { - x := float64(rand.Intn(99) + 1) - assert.InEpsilon(t, math.Sqrt(x)+1, n.Predict([]float64{x})[0]+1, 0.1) - } -} - -func Test_Training(t *testing.T) { - rand.Seed(0) - - data := Samples{ - Sample{[]float64{0}, []float64{0}}, - Sample{[]float64{0}, []float64{0}}, - Sample{[]float64{0}, []float64{0}}, - Sample{[]float64{5}, []float64{1}}, - Sample{[]float64{5}, []float64{1}}, - } - - n := nn.NewNeural(&nn.Config{ - Inputs: 1, - Layout: []int{5, 1}, - Activation: nn.ActivationSigmoid, - Weight: nn.NewUniform(0.5, 0), - Bias: true, - }) - - trainer := NewTrainer(NewSGD(0.5, 0.1, 0, false), 0) - trainer.Train(n, data, nil, 1000, true) - - v := n.Predict([]float64{0}) - assert.InEpsilon(t, 1, 1+v[0], 0.1) - v = n.Predict([]float64{5}) - assert.InEpsilon(t, 1.0, v[0], 0.1) -} - -var data = []Sample{ - {[]float64{2.7810836, 2.550537003}, []float64{0}}, - {[]float64{1.465489372, 2.362125076}, []float64{0}}, - {[]float64{3.396561688, 4.400293529}, []float64{0}}, - {[]float64{1.38807019, 1.850220317}, []float64{0}}, - {[]float64{3.06407232, 3.005305973}, []float64{0}}, - {[]float64{7.627531214, 2.759262235}, []float64{1}}, - {[]float64{5.332441248, 2.088626775}, []float64{1}}, - {[]float64{6.922596716, 1.77106367}, []float64{1}}, - {[]float64{8.675418651, -0.242068655}, []float64{1}}, - {[]float64{7.673756466, 3.508563011}, []float64{1}}, -} - -func Test_Prediction(t *testing.T) { - rand.Seed(0) - - n := nn.NewNeural(&nn.Config{ - Inputs: 2, - Layout: []int{2, 2, 1}, - Activation: nn.ActivationSigmoid, - Weight: nn.NewUniform(0.5, 0), - Bias: true, - }) - trainer := NewTrainer(NewSGD(0.5, 0.1, 0, false), 0) - - trainer.Train(n, data, nil, 5000, true) - - for _, d := range data { - assert.InEpsilon(t, trainer.Predict(n, d.Input)[0]+1, d.Response[0]+1, 0.1) - } -} - -func Test_CrossVal(t *testing.T) { - n := nn.NewNeural(&nn.Config{ - Inputs: 2, - Layout: []int{1, 1}, - Activation: nn.ActivationTanh, - Loss: nn.LossMeanSquared, - Weight: nn.NewUniform(0.5, 0), - Bias: true, - }) - - trainer := NewTrainer(NewSGD(0.5, 0.1, 0, false), 0) - trainer.Train(n, data, data, 1000, true) - - for _, d := range data { - assert.InEpsilon(t, n.Predict(d.Input)[0]+1, d.Response[0]+1, 0.1) - assert.InEpsilon(t, 1, crossValidate(n, data)+1, 0.01) - } -} - -func Test_MultiClass(t *testing.T) { - var data = []Sample{ - {[]float64{2.7810836, 2.550537003}, []float64{1, 0}}, - {[]float64{1.465489372, 2.362125076}, []float64{1, 0}}, - {[]float64{3.396561688, 4.400293529}, []float64{1, 0}}, - {[]float64{1.38807019, 1.850220317}, []float64{1, 0}}, - {[]float64{3.06407232, 3.005305973}, []float64{1, 0}}, - {[]float64{7.627531214, 2.759262235}, []float64{0, 1}}, - {[]float64{5.332441248, 2.088626775}, []float64{0, 1}}, - {[]float64{6.922596716, 1.77106367}, []float64{0, 1}}, - {[]float64{8.675418651, -0.242068655}, []float64{0, 1}}, - {[]float64{7.673756466, 3.508563011}, []float64{0, 1}}, - } - - n := nn.NewNeural(&nn.Config{ - Inputs: 2, - Layout: []int{2, 2}, - Activation: nn.ActivationReLU, - Mode: nn.ModeMultiClass, - Loss: nn.LossMeanSquared, - Weight: nn.NewUniform(0.1, 0), - Bias: true, - }) - - trainer := NewTrainer(NewSGD(0.01, 0.1, 0, false), 0) - trainer.Train(n, data, data, 1000, true) - - for _, d := range data { - est := n.Predict(d.Input) - assert.InEpsilon(t, 1.0, nn.Sum(est), 0.00001) - if d.Response[0] == 1.0 { - assert.InEpsilon(t, n.Predict(d.Input)[0]+1, d.Response[0]+1, 0.1) - } else { - assert.InEpsilon(t, n.Predict(d.Input)[1]+1, d.Response[1]+1, 0.1) - } - assert.InEpsilon(t, 1, crossValidate(n, data)+1, 0.01) - } - -} - -func Test_or(t *testing.T) { - rand.Seed(0) - n := nn.NewNeural(&nn.Config{ - Inputs: 2, - Layout: []int{1, 1}, - Activation: nn.ActivationTanh, - Mode: nn.ModeBinary, - Weight: nn.NewUniform(0.5, 0), - Bias: true, - }) - permutations := Samples{ - {[]float64{0, 0}, []float64{0}}, - {[]float64{1, 0}, []float64{1}}, - {[]float64{0, 1}, []float64{1}}, - {[]float64{1, 1}, []float64{1}}, - } - - trainer := NewTrainer(NewSGD(0.5, 0, 0, false), 10) - - trainer.Train(n, permutations, permutations, 25, true) - - for _, perm := range permutations { - assert.Equal(t, nn.Round(n.Predict(perm.Input)[0]), perm.Response[0]) - } -} - -func Test_xor(t *testing.T) { - rand.Seed(0) - n := nn.NewNeural(&nn.Config{ - Inputs: 2, - Layout: []int{3, 1}, // Sufficient for modeling (AND+OR) - with 5-6 neuron always converges - Activation: nn.ActivationSigmoid, - Mode: nn.ModeBinary, - Weight: nn.NewUniform(.25, 0), - Bias: true, - }) - permutations := Samples{ - {[]float64{0, 0}, []float64{0}}, - {[]float64{1, 0}, []float64{1}}, - {[]float64{0, 1}, []float64{1}}, - {[]float64{1, 1}, []float64{0}}, - } - - trainer := NewTrainer(NewSGD(1.0, 0.1, 1e-6, false), 50) - trainer.Train(n, permutations, permutations, 500, true) - - for _, perm := range permutations { - assert.InEpsilon(t, n.Predict(perm.Input)[0]+1, perm.Response[0]+1, 0.2) - } -} - -func printResult(ideal, actual []float64) { - fmt.Printf("want: %+v have: %+v\n", ideal, actual) -} diff --git a/recommend/rcmd.go b/recommend/rcmd.go index e39e7b6..09eeaf8 100644 --- a/recommend/rcmd.go +++ b/recommend/rcmd.go @@ -10,11 +10,10 @@ import ( "github.com/auxten/edgeRec/feature/embedding" "github.com/auxten/edgeRec/feature/embedding/model" "github.com/auxten/edgeRec/feature/embedding/model/word2vec" - "github.com/auxten/edgeRec/ps" "github.com/auxten/edgeRec/utils" "github.com/karlseguin/ccache/v2" log "github.com/sirupsen/logrus" - "gonum.org/v1/gonum/mat" + "gorgonia.org/tensor" ) const ( @@ -30,7 +29,7 @@ const ( var ( itemEmbeddingModel model.Model - itemEmbeddingMap word2vec.EmbeddingMap + itemEmbeddingMap word2vec.EmbeddingMap32 //TODO: maybe a switch to control whether to reuse training cache when predict UserFeatureCache *ccache.Cache ItemFeatureCache *ccache.Cache @@ -38,14 +37,14 @@ var ( // DefaultUserFeature and DefaultItemFeature are backup if not nil //when user or item missing in database, use this to fill - DefaultUserFeature []float64 - DefaultItemFeature []float64 + DefaultUserFeature []float32 + DefaultItemFeature []float32 DebugUserId int DebugItemId int ) -type Tensor []float64 +type Tensor []float32 type Stage int @@ -55,13 +54,17 @@ const ( ) type TrainSample struct { - Data ps.Samples + X []float32 + Y []float32 + Rows int + XCols int + Info SampleInfo } type sampleVec struct { - vec []float64 - label float64 + vec []float32 + label float32 iWidth int uWidth int } @@ -82,7 +85,7 @@ type BasicFeatureProvider interface { } type PredictAbstract interface { - Predict(X mat.Matrix, Y mat.Mutable) *mat.Dense + Predict(X tensor.Tensor) tensor.Tensor } type Trainer interface { @@ -180,13 +183,13 @@ type PreTrainer interface { type ItemScore struct { ItemId int `json:"itemId"` - Score float64 `json:"score"` + Score float32 `json:"score"` } type Sample struct { UserId int `json:"userId"` ItemId int `json:"itemId"` - Label float64 `json:"label"` + Label float32 `json:"label"` Timestamp int64 `json:"timestamp"` } @@ -207,7 +210,7 @@ func Train(ctx context.Context, recSys RecSys, mlp Fitter) (model Predictor, err log.Errorf("get item embedding model error: %v", err) return } - itemEmbeddingMap, err = itemEmbeddingModel.GenEmbeddingMap() + itemEmbeddingMap, err = itemEmbeddingModel.GenEmbeddingMap32() if err != nil { log.Errorf("get item embedding map error: %v", err) return @@ -215,10 +218,13 @@ func Train(ctx context.Context, recSys RecSys, mlp Fitter) (model Predictor, err } trainSample, err := GetSample(recSys, ctx) - sampleLen := len(trainSample.Data) + if err != nil { + log.Errorf("get train sample error: %v", err) + return + } // start training - log.Infof("\nstart training with %d samples\n", sampleLen) + log.Infof("\nstart training with %d x %d samples\n", trainSample.Rows, trainSample.XCols) pred, err := mlp.Fit(trainSample) if err != nil { @@ -253,17 +259,22 @@ func Rank(ctx context.Context, recSys Predictor, userId int, itemIds []int) (ite return } itemScores = make([]ItemScore, len(itemIds)) + var score interface{} for i, itemId := range itemIds { + if score, err = y.At(i, 0); err != nil { + itemScores = nil + return + } itemScores[i] = ItemScore{ ItemId: itemId, - Score: y.At(i, 0), + Score: score.(float32), } } return } -func BatchPredict(ctx context.Context, recSys Predictor, sampleKeys []Sample) (y *mat.Dense, err error) { +func BatchPredict(ctx context.Context, recSys Predictor, sampleKeys []Sample) (y tensor.Tensor, err error) { ctx = context.WithValue(ctx, StageKey, PredictStage) if preRanker, ok := recSys.(PreRanker); ok { err = preRanker.PreRank(ctx) @@ -273,16 +284,16 @@ func BatchPredict(ctx context.Context, recSys Predictor, sampleKeys []Sample) (y } } - y = mat.NewDense(len(sampleKeys), 1, nil) var ( - x *mat.Dense - zeroSliceX []float64 + xData []float32 + xWidth int + zeroSliceX []float32 debugIds = make([]int, 0) ) for i, sKey := range sampleKeys { var ( - xSlice []float64 + xSlice []float32 ) xSlice, _, _, err = GetSampleVector(ctx, UserFeatureCache, ItemFeatureCache, recSys, &sKey) if err != nil { @@ -290,37 +301,43 @@ func BatchPredict(ctx context.Context, recSys Predictor, sampleKeys []Sample) (y log.Errorf("get sample vector error: %v", err) return } else { - _, col := x.Dims() - zeroSliceX = make([]float64, col) + zeroSliceX = make([]float32, xWidth) xSlice = zeroSliceX } } if i == 0 { - x = mat.NewDense(len(sampleKeys), len(xSlice), nil) + xWidth = len(xSlice) + xData = make([]float32, len(sampleKeys)*xWidth) } - _, xCol := x.Dims() - if len(xSlice) != xCol { - log.Errorf("x slice length %d != x col %d", len(xSlice), xCol) + if len(xSlice) != xWidth { + log.Errorf("x slice length %d != x col %d", len(xSlice), xWidth) return } - x.SetRow(i, xSlice) + copy(xData[i*xWidth:], xSlice) + if DebugItemId == sKey.ItemId && (DebugUserId == 0 || DebugUserId == sKey.UserId) { log.Infof("user %d: item %d: feature %v", sKey.UserId, sKey.ItemId, xSlice) debugIds = append(debugIds, i) } } - recSys.Predict(x, y) + xDense := tensor.NewDense(tensor.Float32, tensor.Shape{len(sampleKeys), xWidth}, tensor.WithBacking(xData)) + + y = recSys.Predict(xDense) for _, i := range debugIds { - log.Infof("user %d: item %d: score %v", sampleKeys[i].UserId, sampleKeys[i].ItemId, y.At(i, 0)) + score, er := y.At(i, 0) + if er != nil { + log.Errorf("get score of line:%d error: %v", i, er) + return + } + log.Infof("user %d: item %d: score %v", sampleKeys[i].UserId, sampleKeys[i].ItemId, score) } return } func GetSample(recSys RecSys, ctx context.Context) (sample *TrainSample, err error) { var ( - sampleWidth int userFeatureWidth int itemFeatureWidth int ) @@ -409,37 +426,46 @@ func GetSample(recSys RecSys, ctx context.Context) (sample *TrainSample, err err return } - if sampleWidth == 0 { - sampleWidth = len(sv.vec) + if sample.XCols == 0 { + sample.XCols = len(sv.vec) } else { - if len(sv.vec) != sampleWidth { - err = fmt.Errorf("sample width mismatch: %v:%v", sampleWidth, len(sv.vec)) + if len(sv.vec) != sample.XCols { + err = fmt.Errorf("sample width mismatch: %v:%v", sample.XCols, len(sv.vec)) return } } - sample.Data = append(sample.Data, ps.Sample{ - Input: sv.vec, - Response: []float64{sv.label}, - }) - if len(sample.Data)%1000 == 0 { - log.Infof("sample size: %d, uc: %d, ic: %d", len(sample.Data), + sample.X = append(sample.X, sv.vec...) + sample.Y = append(sample.Y, sv.label) + sample.Rows++ + if sample.Rows%1000 == 0 { + log.Infof("sample size: %d, uc: %d, ic: %d", sample.Rows, UserFeatureCache.ItemCount(), ItemFeatureCache.ItemCount(), ) } } + //check x and y dimension + if sample.Rows != len(sample.Y) { + err = fmt.Errorf("sample rows not match: %v:%v", sample.Rows, len(sample.Y)) + return + } + if sample.Rows*sample.XCols != len(sample.X) { + err = fmt.Errorf("sample x size not match: %v:%v", sample.Rows*sample.XCols, len(sample.X)) + return + } + return } func GetSampleVector(ctx context.Context, userFeatureCache *ccache.Cache, itemFeatureCache *ccache.Cache, featureProvider BasicFeatureProvider, sampleKey *Sample, -) (vec []float64, userFeatureWidth int, itemFeatureWidth int, err error) { +) (vec []float32, userFeatureWidth int, itemFeatureWidth int, err error) { var ( - zeroItemEmb [ItemEmbDim]float64 - zeroUserBehaviors [ItemEmbDim * UserBehaviorLen]float64 + zeroItemEmb [ItemEmbDim]float32 + zeroUserBehaviors [ItemEmbDim * UserBehaviorLen]float32 user, item *ccache.Item ) @@ -504,7 +530,7 @@ func GetSampleVector(ctx context.Context, } } - vec = utils.ConcatSlice(userFeature, userBehaviors, itemEmb, itemFeature) + vec = utils.ConcatSlice32(userFeature, userBehaviors, itemEmb, itemFeature) return } diff --git a/utils/util.go b/utils/util.go index efd827f..a44018c 100644 --- a/utils/util.go +++ b/utils/util.go @@ -19,6 +19,14 @@ func ConcatSlice(slices ...[]float64) []float64 { return result } +func ConcatSlice32(slices ...[]float32) []float32 { + result := make([]float32, 0) + for _, slice := range slices { + result = append(result, slice...) + } + return result +} + func Float64toBytes(f float64) []byte { bits := math.Float64bits(f) bytes := make([]byte, 8) From 27707fd4e0d0deb687aa49a3286605c314f45f73 Mon Sep 17 00:00:00 2001 From: auxten Date: Tue, 1 Nov 2022 16:25:07 +0800 Subject: [PATCH 2/4] FillTensorRows for train and predict --- example/movielens/dinimpl.go | 10 +-- example/movielens/dinimpl_test.go | 8 +- example/movielens/mlpimpl_test.go | 4 +- model/din/model.go | 120 ++++++++++++++++++------------ model/din/model_test.go | 5 +- utils/util.go | 63 ++++++++++++++-- utils/util_test.go | 4 +- 7 files changed, 147 insertions(+), 67 deletions(-) diff --git a/example/movielens/dinimpl.go b/example/movielens/dinimpl.go index d816869..f574da4 100644 --- a/example/movielens/dinimpl.go +++ b/example/movielens/dinimpl.go @@ -16,8 +16,8 @@ type dinImpl struct { iFeatureDim int cFeatureDim int - predBatchSize int - batchSize, epochs int + PredBatchSize int + BatchSize, epochs int sampleInfo *rcmd.SampleInfo // stop training on earlyStop count of no cost improvement @@ -30,7 +30,7 @@ type dinImpl struct { func (d *dinImpl) Predict(X tensor.Tensor) tensor.Tensor { numPred := X.Shape()[0] - y, err := din.Predict(d.pred, numPred, d.predBatchSize, d.sampleInfo, X) + y, err := din.Predict(d.pred, numPred, d.PredBatchSize, d.sampleInfo, X) if err != nil { log.Errorf("predict din model failed: %v", err) return nil @@ -60,7 +60,7 @@ func (d *dinImpl) Fit(trainSample *rcmd.TrainSample) (pred rcmd.PredictAbstract, d.learner = din.NewDinNet(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim) err = din.Train(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim, - trainSample.Rows, d.batchSize, d.epochs, d.earlyStop, + trainSample.Rows, d.BatchSize, d.epochs, d.earlyStop, d.sampleInfo, inputs, labels, d.learner, @@ -80,7 +80,7 @@ func (d *dinImpl) Fit(trainSample *rcmd.TrainSample) (pred rcmd.PredictAbstract, return } err = din.InitForwardOnlyVm(d.uProfileDim, d.uBehaviorSize, d.uBehaviorDim, d.iFeatureDim, d.cFeatureDim, - d.predBatchSize, dinPred) + d.PredBatchSize, dinPred) if err != nil { log.Errorf("init forward only vm failed: %v", err) return diff --git a/example/movielens/dinimpl_test.go b/example/movielens/dinimpl_test.go index 6e63c85..60d6b3b 100644 --- a/example/movielens/dinimpl_test.go +++ b/example/movielens/dinimpl_test.go @@ -6,8 +6,8 @@ import ( "math/rand" "testing" - "github.com/auxten/edgeRec/model/din" rcmd "github.com/auxten/edgeRec/recommend" + "github.com/auxten/edgeRec/utils" . "github.com/smartystreets/goconvey/convey" ) @@ -32,8 +32,8 @@ func TestDinOnMovielens(t *testing.T) { Convey("Train din model", t, func() { dinModel := &dinImpl{ - predBatchSize: 100, - batchSize: 200, + PredBatchSize: 100, + BatchSize: 200, epochs: 100, earlyStop: 20, } @@ -73,7 +73,7 @@ func TestDinOnMovielens(t *testing.T) { } yPred, err := rcmd.BatchPredict(batchPredictCtx, dinPred, sampleKeys) So(err, ShouldBeNil) - rocAuc := din.RocAuc32(yPred.Data().([]float32), yTrue) + rocAuc := utils.RocAuc32(yPred.Data().([]float32), yTrue) //rocAuc := metrics.ROCAUCScore(yTrue, yPred, "", nil) rowCount := len(yTrue) fmt.Printf("rocAuc on test set %d: %f\n", rowCount, rocAuc) diff --git a/example/movielens/mlpimpl_test.go b/example/movielens/mlpimpl_test.go index c203e59..66cce7f 100644 --- a/example/movielens/mlpimpl_test.go +++ b/example/movielens/mlpimpl_test.go @@ -6,8 +6,8 @@ import ( "math/rand" "testing" - "github.com/auxten/edgeRec/model/din" rcmd "github.com/auxten/edgeRec/recommend" + "github.com/auxten/edgeRec/utils" . "github.com/smartystreets/goconvey/convey" ) @@ -70,7 +70,7 @@ func TestSimpleMLPOnMovielens(t *testing.T) { } yPred, err := rcmd.BatchPredict(batchPredictCtx, dinPred, sampleKeys) So(err, ShouldBeNil) - rocAuc := din.RocAuc32(yPred.Data().([]float32), yTrue) + rocAuc := utils.RocAuc32(yPred.Data().([]float32), yTrue) rowCount := len(yTrue) fmt.Printf("rocAuc on test set %d: %f\n", rowCount, rocAuc) }) diff --git a/model/din/model.go b/model/din/model.go index 3748354..cec27bb 100644 --- a/model/din/model.go +++ b/model/din/model.go @@ -4,10 +4,8 @@ import ( "fmt" "math" - "github.com/auxten/edgeRec/nn/metrics" rcmd "github.com/auxten/edgeRec/recommend" log "github.com/sirupsen/logrus" - "gonum.org/v1/gonum/mat" "gopkg.in/cheggaaa/pb.v1" G "gorgonia.org/gorgonia" "gorgonia.org/tensor" @@ -107,6 +105,9 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in // handlePprof(sigChan, doneChan) batches := numExamples / batchSize + if numExamples%batchSize != 0 { + batches++ + } log.Printf("Batches %d", batches) bar := pb.New(batches) var ( @@ -139,6 +140,11 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in if xUserProfileVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.UserProfileRange[0], si.UserProfileRange[1])}...); err != nil { log.Fatalf("Unable to slice xUserProfileVal %v", err) } + if xUserProfileVal.Shape()[0] < batchSize { + if xUserProfileVal, err = FillTensorRows(batchSize, xUserProfileVal); err != nil { + log.Fatalf("Unable to fill sample rows %v", err) + } + } if err = G.Let(xUserProfile, xUserProfileVal); err != nil { log.Fatalf("Unable to let xUserProfileVal %v", err) } @@ -146,6 +152,11 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in if xUserBehaviorsVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.UserBehaviorRange[0], si.UserBehaviorRange[1])}...); err != nil { log.Fatalf("Unable to slice xUserBehaviorsVal %v", err) } + if xUserBehaviorsVal.Shape()[0] < batchSize { + if xUserBehaviorsVal, err = FillTensorRows(batchSize, xUserBehaviorsVal); err != nil { + log.Fatalf("Unable to fill sample rows %v", err) + } + } if err = G.Let(xUserBehaviorMatrix, xUserBehaviorsVal); err != nil { log.Fatalf("Unable to let xUserBehaviorsVal %v", err) } @@ -153,6 +164,11 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in if xItemFeatureVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.ItemFeatureRange[0], si.ItemFeatureRange[1])}...); err != nil { log.Fatalf("Unable to slice xItemFeatureVal %v", err) } + if xItemFeatureVal.Shape()[0] < batchSize { + if xItemFeatureVal, err = FillTensorRows(batchSize, xItemFeatureVal); err != nil { + log.Fatalf("Unable to fill sample rows %v", err) + } + } if err = G.Let(xItemFeature, xItemFeatureVal); err != nil { log.Fatalf("Unable to let xItemFeatureVal %v", err) } @@ -160,6 +176,11 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in if xCtxFeatureVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.CtxFeatureRange[0], si.CtxFeatureRange[1])}...); err != nil { log.Fatalf("Unable to slice xCtxFeatureVal %v", err) } + if xCtxFeatureVal.Shape()[0] < batchSize { + if xCtxFeatureVal, err = FillTensorRows(batchSize, xCtxFeatureVal); err != nil { + log.Fatalf("Unable to fill sample rows %v", err) + } + } if err = G.Let(xCtxFeature, xCtxFeatureVal); err != nil { log.Fatalf("Unable to let xCtxFeatureVal %v", err) } @@ -167,6 +188,11 @@ func Train(uProfileDim, uBehaviorSize, uBehaviorDim, iFeatureDim, cFeatureDim in if yVal, err = targets.Slice(G.S(start, end)); err != nil { log.Fatalf("Unable to slice y %v", err) } + if yVal.Shape()[0] < batchSize { + if yVal, err = FillTensorRows(batchSize, yVal); err != nil { + log.Fatalf("Unable to fill sample rows %v", err) + } + } if err = G.Let(y, yVal); err != nil { log.Fatalf("Unable to let y %v", err) } @@ -239,8 +265,11 @@ func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs te vm := m.Vm() batches := numExamples / batchSize + if numExamples%batchSize != 0 { + batches++ + } - for b := 0; b <= batches; b++ { + for b := 0; b < batches; b++ { start := b * batchSize end := start + batchSize if start >= numExamples { @@ -257,37 +286,61 @@ func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs te xCtxFeatureVal tensor.Tensor ) - if xUserProfileVal, err = inputs.Slice([]tensor.Slice{G.S(start, start+batchSize), G.S(si.UserProfileRange[0], si.UserProfileRange[1])}...); err != nil { + if xUserProfileVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.UserProfileRange[0], si.UserProfileRange[1])}...); err != nil { log.Errorf("Unable to slice xUserProfileVal %v", err) return nil, err } + if xUserProfileVal.Shape()[0] < batchSize { + if xUserProfileVal, err = FillTensorRows(batchSize, xUserProfileVal); err != nil { + log.Errorf("Unable to fill input rows %v", err) + return nil, err + } + } if err = G.Let(xUserProfile, xUserProfileVal); err != nil { log.Errorf("Unable to let xUserProfileVal %v", err) return nil, err } - if xUserBehaviorsVal, err = inputs.Slice([]tensor.Slice{G.S(start, start+batchSize), G.S(si.UserBehaviorRange[0], si.UserBehaviorRange[1])}...); err != nil { + if xUserBehaviorsVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.UserBehaviorRange[0], si.UserBehaviorRange[1])}...); err != nil { log.Errorf("Unable to slice xUserBehaviorsVal %v", err) return nil, err } + if xUserBehaviorsVal.Shape()[0] < batchSize { + if xUserBehaviorsVal, err = FillTensorRows(batchSize, xUserBehaviorsVal); err != nil { + log.Errorf("Unable to fill input rows %v", err) + return nil, err + } + } if err = G.Let(xUbMatrix, xUserBehaviorsVal); err != nil { log.Errorf("Unable to let xUserBehaviorsVal %v", err) return nil, err } - if xItemFeatureVal, err = inputs.Slice([]tensor.Slice{G.S(start, start+batchSize), G.S(si.ItemFeatureRange[0], si.ItemFeatureRange[1])}...); err != nil { + if xItemFeatureVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.ItemFeatureRange[0], si.ItemFeatureRange[1])}...); err != nil { log.Errorf("Unable to slice xItemFeatureVal %v", err) return nil, err } + if xItemFeatureVal.Shape()[0] < batchSize { + if xItemFeatureVal, err = FillTensorRows(batchSize, xItemFeatureVal); err != nil { + log.Errorf("Unable to fill input rows %v", err) + return nil, err + } + } if err = G.Let(xItemFeature, xItemFeatureVal); err != nil { log.Errorf("Unable to let xItemFeatureVal %v", err) return nil, err } - if xCtxFeatureVal, err = inputs.Slice([]tensor.Slice{G.S(start, start+batchSize), G.S(si.CtxFeatureRange[0], si.CtxFeatureRange[1])}...); err != nil { + if xCtxFeatureVal, err = inputs.Slice([]tensor.Slice{G.S(start, end), G.S(si.CtxFeatureRange[0], si.CtxFeatureRange[1])}...); err != nil { log.Errorf("Unable to slice xCtxFeatureVal %v", err) return nil, err } + if xCtxFeatureVal.Shape()[0] < batchSize { + if xCtxFeatureVal, err = FillTensorRows(batchSize, xCtxFeatureVal); err != nil { + log.Errorf("Unable to fill input rows %v", err) + return nil, err + } + } if err = G.Let(xCtxFeature, xCtxFeatureVal); err != nil { log.Errorf("Unable to let xCtxFeatureVal %v", err) return nil, err @@ -309,46 +362,21 @@ func Predict(m Model, numExamples, batchSize int, si *rcmd.SampleInfo, inputs te return } -func accuracy(prediction, y []float64) float64 { - var ok float64 - for i := 0; i < len(prediction); i++ { - if math.Round(float64(prediction[i]-y[i])) == 0 { - ok += 1.0 - } - } - return ok / float64(len(y)) -} - -func RocAuc(pred, y []float64) float64 { - boolY := make([]float64, len(y)) - for i := 0; i < len(y); i++ { - if y[i] > 0.5 { - boolY[i] = 1.0 - } else { - boolY[i] = 0.0 - } - } - yTrue := mat.NewDense(len(y), 1, boolY) - yScore := mat.NewDense(len(pred), 1, pred) - - return metrics.ROCAUCScore(yTrue, yScore, "", nil) -} - -func RocAuc32(pred, y []float32) float32 { - boolY := make([]float64, len(y)) - for i := 0; i < len(y); i++ { - if y[i] > 0.5 { - boolY[i] = 1.0 - } else { - boolY[i] = 0.0 - } +// FillTensorRows fills the batch samples with the zero data to make sample size fit the batch size +// it sames tensor.Concat is not optimized for large dataset. +// we should avoid using FillTensorRows while input data is large. +func FillTensorRows(batchSize int, inputs tensor.Tensor) (x tensor.Tensor, err error) { + numExamples := inputs.Shape()[0] + fillSize := numExamples % batchSize + if fillSize == 0 { + return inputs, nil } - pred64 := make([]float64, len(pred)) - for i := 0; i < len(pred); i++ { - pred64[i] = float64(pred[i]) + inputZeros := tensor.New(tensor.WithShape(batchSize-fillSize, inputs.Shape()[1]), + tensor.WithBacking(make([]float32, (batchSize-fillSize)*inputs.Shape()[1]))) + x, err = tensor.Concat(0, inputs, inputZeros) + if err != nil { + return nil, err } - yTrue := mat.NewDense(len(y), 1, boolY) - yScore := mat.NewDense(len(pred), 1, pred64) - return float32(metrics.ROCAUCScore(yTrue, yScore, "", nil)) + return } diff --git a/model/din/model_test.go b/model/din/model_test.go index acb3de6..4904616 100644 --- a/model/din/model_test.go +++ b/model/din/model_test.go @@ -6,6 +6,7 @@ import ( "testing" rcmd "github.com/auxten/edgeRec/recommend" + "github.com/auxten/edgeRec/utils" log "github.com/sirupsen/logrus" . "github.com/smartystreets/goconvey/convey" "gorgonia.org/tensor" @@ -103,7 +104,7 @@ func TestMultiModel(t *testing.T) { So(predictions, ShouldNotBeNil) So(predictions, ShouldHaveLength, testSamples) log.Debugf("predictions: %+v", predictions) - auc := RocAuc32(predictions, labels.Data().([]float32)[:testSamples]) + auc := utils.RocAuc32(predictions, labels.Data().([]float32)[:testSamples]) log.Debugf("auc: %f", auc) So(auc, ShouldBeGreaterThan, 0.5) }) @@ -136,7 +137,7 @@ func TestMultiModel(t *testing.T) { So(predictions, ShouldNotBeNil) So(predictions, ShouldHaveLength, testSamples) log.Debugf("predictions: %+v", predictions) - auc := RocAuc32(predictions, labels.Data().([]float32)[:testSamples]) + auc := utils.RocAuc32(predictions, labels.Data().([]float32)[:testSamples]) log.Debugf("auc: %f", auc) So(auc, ShouldBeGreaterThan, 0.5) }) diff --git a/utils/util.go b/utils/util.go index a44018c..cd9f64f 100644 --- a/utils/util.go +++ b/utils/util.go @@ -7,8 +7,8 @@ import ( "strconv" "strings" - "gonum.org/v1/gonum/integrate" - "gonum.org/v1/gonum/stat" + "github.com/auxten/edgeRec/nn/metrics" + "gonum.org/v1/gonum/mat" ) func ConcatSlice(slices ...[]float64) []float64 { @@ -68,11 +68,6 @@ func TopNOccurrences(s []string, n int) []KeyCnt { return l1[:n] } -func RocAuc(label []bool, y []float64) float64 { - tpr, fpr, _ := stat.ROC(nil, y, label, nil) - return integrate.Trapezoidal(fpr, tpr) -} - func ParseInt64Seq(s string) []int64 { var ( seq []int64 @@ -97,3 +92,57 @@ func Int64SeqToIntSeq(seq []int64) []int { } return result } + +func Accuracy(prediction, y []float64) float64 { + var ok float64 + for i := 0; i < len(prediction); i++ { + if math.Round(float64(prediction[i]-y[i])) == 0 { + ok += 1.0 + } + } + return ok / float64(len(y)) +} + +func Accuracy32(prediction, y []float32) float32 { + var ok float32 + for i := 0; i < len(prediction); i++ { + if math.Round(float64(prediction[i]-y[i])) == 0 { + ok += 1.0 + } + } + return ok / float32(len(y)) +} + +func RocAuc(pred, y []float64) float64 { + boolY := make([]float64, len(y)) + for i := 0; i < len(y); i++ { + if y[i] > 0.5 { + boolY[i] = 1.0 + } else { + boolY[i] = 0.0 + } + } + yTrue := mat.NewDense(len(y), 1, boolY) + yScore := mat.NewDense(len(pred), 1, pred) + + return metrics.ROCAUCScore(yTrue, yScore, "", nil) +} + +func RocAuc32(pred, y []float32) float32 { + boolY := make([]float64, len(y)) + for i := 0; i < len(y); i++ { + if y[i] > 0.5 { + boolY[i] = 1.0 + } else { + boolY[i] = 0.0 + } + } + pred64 := make([]float64, len(pred)) + for i := 0; i < len(pred); i++ { + pred64[i] = float64(pred[i]) + } + yTrue := mat.NewDense(len(y), 1, boolY) + yScore := mat.NewDense(len(pred), 1, pred64) + + return float32(metrics.ROCAUCScore(yTrue, yScore, "", nil)) +} diff --git a/utils/util_test.go b/utils/util_test.go index da7b738..79e4188 100644 --- a/utils/util_test.go +++ b/utils/util_test.go @@ -24,7 +24,9 @@ func TestUtils(t *testing.T) { func TestGetAUC(t *testing.T) { Convey("test auc", t, func() { - auc := RocAuc([]bool{false, true, false, true}, []float64{0.1, 0.35, 0.4, 0.8}) + auc := RocAuc([]float64{0.1, 0.35, 0.4, 0.8}, []float64{0, 1, 0, 1}) + So(auc, ShouldEqual, .75) + auc = RocAuc([]float64{0.1, 0.4, 0.35, 0.8}, []float64{0, 0, 1, 1}) So(auc, ShouldEqual, .75) }) } From 577ff2b1d1f0cc99db77ec590444f3a61a4ec7ac Mon Sep 17 00:00:00 2001 From: auxten Date: Tue, 1 Nov 2022 16:27:51 +0800 Subject: [PATCH 3/4] Add args for movielens-20m --- example/movielens/dinimpl_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/example/movielens/dinimpl_test.go b/example/movielens/dinimpl_test.go index 60d6b3b..bbd0be8 100644 --- a/example/movielens/dinimpl_test.go +++ b/example/movielens/dinimpl_test.go @@ -22,6 +22,8 @@ func TestDinOnMovielens(t *testing.T) { var ( movielens = &MovielensRec{ + //DataPath: "movielens-20m.db", + //SampleCnt: 14400000, DataPath: "movielens.db", SampleCnt: 79948, //SampleCnt: 10000, @@ -33,9 +35,10 @@ func TestDinOnMovielens(t *testing.T) { Convey("Train din model", t, func() { dinModel := &dinImpl{ PredBatchSize: 100, - BatchSize: 200, - epochs: 100, - earlyStop: 20, + //BatchSize: 5000, + BatchSize: 200, + epochs: 100, + earlyStop: 20, } trainCtx := context.Background() model, err = rcmd.Train(trainCtx, movielens, dinModel) @@ -44,6 +47,7 @@ func TestDinOnMovielens(t *testing.T) { }) Convey("Predict din model", t, func() { + //testCount := 5610000 testCount := 20600 rows, err := db.Query( "SELECT userId, movieId, rating, timestamp FROM ratings_test ORDER BY timestamp, userId ASC LIMIT ?", testCount) From 85e96f0ebb71640f9acacb33248f3bd3981271da Mon Sep 17 00:00:00 2001 From: auxten Date: Tue, 1 Nov 2022 19:08:25 +0800 Subject: [PATCH 4/4] Large predict batch size --- example/movielens/dinimpl_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/example/movielens/dinimpl_test.go b/example/movielens/dinimpl_test.go index bbd0be8..09b5c2b 100644 --- a/example/movielens/dinimpl_test.go +++ b/example/movielens/dinimpl_test.go @@ -34,10 +34,11 @@ func TestDinOnMovielens(t *testing.T) { Convey("Train din model", t, func() { dinModel := &dinImpl{ + //PredBatchSize: 5000, PredBatchSize: 100, //BatchSize: 5000, BatchSize: 200, - epochs: 100, + epochs: 200, earlyStop: 20, } trainCtx := context.Background()