awesomemachinelearning
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎example/m_attrec_demo.py‎
Lines changed: 47 additions & 32 deletions b/‎example/m_attrec_demo.py‎
Lines changed: 47 additions & 32 deletions
diff --git a/‎example/m_caser_demo.py‎
Lines changed: 48 additions & 34 deletions b/‎example/m_caser_demo.py‎
Lines changed: 48 additions & 34 deletions
diff --git a/‎example/m_fissa_demo.py‎
Lines changed: 72 additions & 0 deletions b/‎example/m_fissa_demo.py‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎example/m_gru4rec_demo.py‎
Lines changed: 71 additions & 0 deletions b/‎example/m_gru4rec_demo.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎example/m_ncf_demo.py‎
Lines changed: 7 additions & 7 deletions b/‎example/m_ncf_demo.py‎
Lines changed: 7 additions & 7 deletions
@@ -5,3 +5,4 @@
 /reclearn.egg-info/*
 /build/*
 */__pycache__
+**/__pycache__
@@ -1,56 +1,71 @@
-from tensorflow.keras.optimizers import Adam
+"""
+Created on Nov 20, 2021
+train AttRec demo
+@author: Ziyao Geng([email protected])
+"""
+import os
 from time import time
+from tensorflow.keras.optimizers import Adam
+
 from reclearn.models.matching import AttRec
 from reclearn.data.datasets import movielens as ml
 from reclearn.evaluator import eval_pos_neg
-import os
+from reclearn.data.feature_column import sparseFeature
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 os.environ['CUDA_VISIBLE_DEVICES'] = '6'
 
+# Hyper parameters
+neg_num = 4
+embed_dim = 64
+seq_len = 200
+learning_rate = 0.001
+epochs = 20
+batch_size = 512
+
+model_params = {
+    'seq_len': seq_len,
+    'mode': 'inner',
+    'w': 0.3,
+    'loss_name': 'hinge_loss',
+    'gamma': 0.5,
+    'embed_reg': 0.
+}
+
+k = 10
+
 
 def main():
-    epochs = 20
-    learning_rate = 0.001
-    batch_size = 512
-    neg_num = 1
-    seq_len = 5
-    k = 10
-    test_neg_num = 100
     file_path = 'data/ml-1m/ratings.dat'
-    user_num, item_num, train_path, val_path, test_path = ml.load_seq_movielens(file_path=file_path)
-    train_data = ml.load_seq_ml(train_path, "train", neg_num, seq_len, contain_user=True)
-    val_data = ml.load_seq_ml(val_path, "val", neg_num, seq_len, contain_user=True)
-    test_data = ml.load_seq_ml(test_path, "test", test_neg_num, seq_len, contain_user=True)
+    # TODO: 1. Split Data
+    train_path, val_path, test_path, meta_path = ml.split_seq_movielens(file_path=file_path)
+    with open(meta_path) as f:
+        max_user_num, max_item_num = [int(x) for x in f.readline().strip('\n').split('\t')]
+    # TODO: 2. Build Feature Columns
     fea_cols = {
-        'user_num': user_num,
-        'item_num': item_num,
-        'seq_len': seq_len,
-        'embed_dim': 64
-    }
-    params = {
-        'fea_cols': fea_cols,
-        'mode': 'inner',
-        'loss_name': 'hinge_loss',
-        'gamma': 0.5,
-        'w': 0.5,
-        'embed_reg': 0.
+        'item': sparseFeature('item', max_item_num + 1, embed_dim),
+        'user': sparseFeature('user', max_user_num + 1, embed_dim)
     }
-    model = AttRec(**params)
-    model.summary()
+    # TODO: 3. Load Data
+    train_data = ml.load_seq_ml(train_path, "train", seq_len, neg_num, max_item_num, contain_user=True)
+    val_data = ml.load_seq_ml(val_path, "val", seq_len, neg_num, max_item_num, contain_user=True)
+    test_data = ml.load_seq_ml(test_path, "test", seq_len, 100, max_item_num, contain_user=True)
+    # TODO: 4. Build Model
+    model = AttRec(fea_cols, **model_params)
     model.compile(optimizer=Adam(learning_rate=learning_rate))
-    for epoch in range(1, epochs+1):
+    # TODO: 5. Fit Model
+    for epoch in range(1, epochs + 1):
         t1 = time()
         model.fit(
             x=train_data,
-            validation_data=val_data,
             epochs=1,
+            validation_data=val_data,
             batch_size=batch_size
         )
-        eval_dict = eval_pos_neg(model, test_data, batch_size, ["hr", "ndcg"], k)
+        eval_dict = eval_pos_neg(model, test_data, ['hr', 'mrr', 'ndcg'], k, batch_size)
         t2 = time()
-        print('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, NDCG = %.4f, '
-              % (epoch, t2 - t1, time() - t2, eval_dict['hr'], eval_dict['ndcg']))
+        print('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, MRR = %.4f, NDCG = %.4f'
+              % (epoch, t2 - t1, time() - t2, eval_dict['hr'], eval_dict['mrr'], eval_dict['ndcg']))
 
 
 main()
@@ -1,58 +1,72 @@
-from tensorflow.keras.optimizers import Adam
+"""
+Created on Nov 20, 2021
+train Caser demo
+@author: Ziyao Geng([email protected])
+"""
+import os
 from time import time
+from tensorflow.keras.optimizers import Adam
+
 from reclearn.models.matching import Caser
 from reclearn.data.datasets import movielens as ml
 from reclearn.evaluator import eval_pos_neg
-import os
+from reclearn.data.feature_column import sparseFeature
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 os.environ['CUDA_VISIBLE_DEVICES'] = '6'
 
+# Hyper parameters
+neg_num = 4
+embed_dim = 64
+seq_len = 200
+learning_rate = 0.001
+epochs = 20
+batch_size = 512
+
+model_params = {
+    'seq_len': seq_len,
+    'hor_n': 8,
+    'hor_h': 2,
+    'ver_n': 4,
+    'dnn_dropout': 0.2,
+    'loss_name': 'binary_entropy_loss',
+    'embed_reg': 0.
+}
+
+k = 10
+
 
 def main():
-    epochs = 30
-    learning_rate = 0.001
-    batch_size = 512
-    neg_num = 1
-    seq_len = 200
-    k = 10
-    test_neg_num = 100
     file_path = 'data/ml-1m/ratings.dat'
-    user_num, item_num, train_path, val_path, test_path = ml.load_seq_movielens(file_path=file_path)
-    train_data = ml.load_seq_ml(train_path, "train", neg_num, seq_len, contain_user=True)
-    val_data = ml.load_seq_ml(val_path, "val", neg_num, seq_len, contain_user=True)
-    test_data = ml.load_seq_ml(test_path, "test", test_neg_num, seq_len, contain_user=True)
+    # TODO: 1. Split Data
+    train_path, val_path, test_path, meta_path = ml.split_seq_movielens(file_path=file_path)
+    with open(meta_path) as f:
+        max_user_num, max_item_num = [int(x) for x in f.readline().strip('\n').split('\t')]
+    # TODO: 2. Build Feature Columns
     fea_cols = {
-        'user_num': user_num,
-        'item_num': item_num,
-        'seq_len': seq_len,
-        'embed_dim': 50
-    }
-    params = {
-        'fea_cols': fea_cols,
-        'hor_n': 8,
-        'hor_h': 2,
-        'ver_n': 4,
-        'dnn_dropout': 0.2,
-        'loss_name': 'binary_entropy_loss',
-        'gamma': 0.5,
-        'embed_reg': 0.
+        'user': sparseFeature('user', max_user_num + 1, embed_dim),
+        'item': sparseFeature('item', max_item_num + 1, embed_dim)
     }
-    model = Caser(**params)
-    model.summary()
+    # TODO: 3. Load Data
+    train_data = ml.load_seq_ml(train_path, "train", seq_len, neg_num, max_item_num, contain_user=True)
+    val_data = ml.load_seq_ml(val_path, "val", seq_len, neg_num, max_item_num, contain_user=True)
+    test_data = ml.load_seq_ml(test_path, "test", seq_len, 100, max_item_num, contain_user=True)
+    # TODO: 4. Build Model
+    model = Caser(fea_cols, **model_params)
     model.compile(optimizer=Adam(learning_rate=learning_rate))
-    for epoch in range(1, epochs+1):
+    # TODO: 5. Fit Model
+    for epoch in range(1, epochs + 1):
         t1 = time()
         model.fit(
             x=train_data,
-            validation_data=val_data,
             epochs=1,
+            validation_data=val_data,
             batch_size=batch_size
         )
-        eval_dict = eval_pos_neg(model, test_data, batch_size, ["hr", "ndcg"], k)
+        eval_dict = eval_pos_neg(model, test_data, ['hr', 'mrr', 'ndcg'], k, batch_size)
         t2 = time()
-        print('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, NDCG = %.4f, '
-              % (epoch, t2 - t1, time() - t2, eval_dict['hr'], eval_dict['ndcg']))
+        print('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, MRR = %.4f, NDCG = %.4f'
+              % (epoch, t2 - t1, time() - t2, eval_dict['hr'], eval_dict['mrr'], eval_dict['ndcg']))
 
 
 main()
@@ -0,0 +1,72 @@
+"""
+Created on Nov 21, 2021
+train FISSA demo
+@author: Ziyao Geng([email protected])
+"""
+import os
+from time import time
+from tensorflow.keras.optimizers import Adam
+
+from reclearn.models.matching import FISSA
+from reclearn.data.datasets import movielens as ml
+from reclearn.evaluator import eval_pos_neg
+from reclearn.data.feature_column import sparseFeature
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+os.environ['CUDA_VISIBLE_DEVICES'] = '6'
+
+# Hyper parameters
+neg_num = 4
+embed_dim = 64
+seq_len = 200
+learning_rate = 0.001
+epochs = 20
+batch_size = 512
+
+model_params = {
+    'seq_len': seq_len,
+    'blocks': 2,
+    'num_heads': 1,
+    'ffn_hidden_unit': 64,
+    'dnn_dropout': 0.2,
+    'layer_norm_eps': 1e-6,
+    'loss_name': 'binary_entropy_loss',
+    'embed_reg': 0.
+}
+
+k = 10
+
+
+def main():
+    file_path = 'data/ml-1m/ratings.dat'
+    # TODO: 1. Split Data
+    train_path, val_path, test_path, meta_path = ml.split_seq_movielens(file_path=file_path)
+    with open(meta_path) as f:
+        _, max_item_num = [int(x) for x in f.readline().strip('\n').split('\t')]
+    # TODO: 2. Build Feature Columns
+    fea_cols = {
+        'item': sparseFeature('item', max_item_num + 1, embed_dim)
+    }
+    # TODO: 3. Load Data
+    train_data = ml.load_seq_ml(train_path, "train", seq_len, neg_num, max_item_num, contain_user=True)
+    val_data = ml.load_seq_ml(val_path, "val", seq_len, neg_num, max_item_num, contain_user=True)
+    test_data = ml.load_seq_ml(test_path, "test", seq_len, 100, max_item_num, contain_user=True)
+    # TODO: 4. Build Model
+    model = FISSA(fea_cols, **model_params)
+    model.compile(optimizer=Adam(learning_rate=learning_rate))
+    # TODO: 5. Fit Model
+    for epoch in range(1, epochs + 1):
+        t1 = time()
+        model.fit(
+            x=train_data,
+            epochs=1,
+            validation_data=val_data,
+            batch_size=batch_size
+        )
+        eval_dict = eval_pos_neg(model, test_data, ['hr', 'mrr', 'ndcg'], k, batch_size)
+        t2 = time()
+        print('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, MRR = %.4f, NDCG = %.4f'
+              % (epoch, t2 - t1, time() - t2, eval_dict['hr'], eval_dict['mrr'], eval_dict['ndcg']))
+
+
+main()
@@ -0,0 +1,71 @@
+"""
+Created on Nov 20, 2021
+train GRU4Rec demo
+@author: Ziyao Geng([email protected])
+"""
+import os
+from time import time
+from tensorflow.keras.optimizers import Adam
+
+from reclearn.models.matching import GRU4Rec
+from reclearn.data.datasets import movielens as ml
+from reclearn.evaluator import eval_pos_neg
+from reclearn.data.feature_column import sparseFeature
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+os.environ['CUDA_VISIBLE_DEVICES'] = '6'
+
+# Hyper parameters
+neg_num = 4
+embed_dim = 64
+seq_len = 200
+learning_rate = 0.001
+epochs = 20
+batch_size = 512
+
+model_params = {
+    'seq_len': seq_len,
+    'gru_layers': 2,
+    'gru_unit': 128,
+    'gru_activation': 'tanh',
+    'dnn_dropout': 0.5,
+    'loss_name': 'bpr_loss',
+    'embed_reg': 0.
+}
+
+k = 10
+
+
+def main():
+    file_path = 'data/ml-1m/ratings.dat'
+    # TODO: 1. Split Data
+    train_path, val_path, test_path, meta_path = ml.split_seq_movielens(file_path=file_path)
+    with open(meta_path) as f:
+        _, max_item_num = [int(x) for x in f.readline().strip('\n').split('\t')]
+    # TODO: 2. Build Feature Columns
+    fea_cols = {
+        'item': sparseFeature('item', max_item_num + 1, embed_dim)
+    }
+    # TODO: 3. Load Data
+    train_data = ml.load_seq_ml(train_path, "train", seq_len, neg_num, max_item_num)
+    val_data = ml.load_seq_ml(val_path, "val", seq_len, neg_num, max_item_num)
+    test_data = ml.load_seq_ml(test_path, "test", seq_len, 100, max_item_num)
+    # TODO: 4. Build Model
+    model = GRU4Rec(fea_cols, **model_params)
+    model.compile(optimizer=Adam(learning_rate=learning_rate))
+    # TODO: 5. Fit Model
+    for epoch in range(1, epochs + 1):
+        t1 = time()
+        model.fit(
+            x=train_data,
+            epochs=1,
+            validation_data=val_data,
+            batch_size=batch_size
+        )
+        eval_dict = eval_pos_neg(model, test_data, ['hr', 'mrr', 'ndcg'], k, batch_size)
+        t2 = time()
+        print('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, MRR = %.4f, NDCG = %.4f'
+              % (epoch, t2 - t1, time() - t2, eval_dict['hr'], eval_dict['mrr'], eval_dict['ndcg']))
+
+
+main()
@@ -1,6 +1,6 @@
 """
 Created on Nov 19, 2021
-train BPR demo
+train NCF demo
 @author: Ziyao Geng([email protected])
 """
 import os
@@ -23,12 +23,12 @@
 batch_size = 512
 
 model_params = {
-        'hidden_units': [256, 128, 64],
-        'activation': 'relu',
-        'dnn_dropout': 0.5,
-        'is_batch_norm': False,
-        'loss_name': 'binary_entropy_loss',
-        'gamma': 0.3
+    'hidden_units': [256, 128, 64],
+    'activation': 'relu',
+    'dnn_dropout': 0.5,
+    'is_batch_norm': False,
+    'loss_name': 'binary_entropy_loss',
+    'gamma': 0.3
 }
 
 k = 10