1
- from tensorflow .keras .optimizers import Adam
1
+ """
2
+ Created on Nov 20, 2021
3
+ train Caser demo
4
+ @author: Ziyao Geng([email protected] )
5
+ """
6
+ import os
2
7
from time import time
8
+ from tensorflow .keras .optimizers import Adam
9
+
3
10
from reclearn .models .matching import Caser
4
11
from reclearn .data .datasets import movielens as ml
5
12
from reclearn .evaluator import eval_pos_neg
6
- import os
13
+ from reclearn . data . feature_column import sparseFeature
7
14
8
15
os .environ ['TF_CPP_MIN_LOG_LEVEL' ] = '2'
9
16
os .environ ['CUDA_VISIBLE_DEVICES' ] = '6'
10
17
18
+ # Hyper parameters
19
+ neg_num = 4
20
+ embed_dim = 64
21
+ seq_len = 200
22
+ learning_rate = 0.001
23
+ epochs = 20
24
+ batch_size = 512
25
+
26
+ model_params = {
27
+ 'seq_len' : seq_len ,
28
+ 'hor_n' : 8 ,
29
+ 'hor_h' : 2 ,
30
+ 'ver_n' : 4 ,
31
+ 'dnn_dropout' : 0.2 ,
32
+ 'loss_name' : 'binary_entropy_loss' ,
33
+ 'embed_reg' : 0.
34
+ }
35
+
36
+ k = 10
37
+
11
38
12
39
def main ():
13
- epochs = 30
14
- learning_rate = 0.001
15
- batch_size = 512
16
- neg_num = 1
17
- seq_len = 200
18
- k = 10
19
- test_neg_num = 100
20
40
file_path = 'data/ml-1m/ratings.dat'
21
- user_num , item_num , train_path , val_path , test_path = ml .load_seq_movielens (file_path = file_path )
22
- train_data = ml .load_seq_ml (train_path , "train" , neg_num , seq_len , contain_user = True )
23
- val_data = ml .load_seq_ml (val_path , "val" , neg_num , seq_len , contain_user = True )
24
- test_data = ml .load_seq_ml (test_path , "test" , test_neg_num , seq_len , contain_user = True )
41
+ # TODO: 1. Split Data
42
+ train_path , val_path , test_path , meta_path = ml .split_seq_movielens (file_path = file_path )
43
+ with open (meta_path ) as f :
44
+ max_user_num , max_item_num = [int (x ) for x in f .readline ().strip ('\n ' ).split ('\t ' )]
45
+ # TODO: 2. Build Feature Columns
25
46
fea_cols = {
26
- 'user_num' : user_num ,
27
- 'item_num' : item_num ,
28
- 'seq_len' : seq_len ,
29
- 'embed_dim' : 50
30
- }
31
- params = {
32
- 'fea_cols' : fea_cols ,
33
- 'hor_n' : 8 ,
34
- 'hor_h' : 2 ,
35
- 'ver_n' : 4 ,
36
- 'dnn_dropout' : 0.2 ,
37
- 'loss_name' : 'binary_entropy_loss' ,
38
- 'gamma' : 0.5 ,
39
- 'embed_reg' : 0.
47
+ 'user' : sparseFeature ('user' , max_user_num + 1 , embed_dim ),
48
+ 'item' : sparseFeature ('item' , max_item_num + 1 , embed_dim )
40
49
}
41
- model = Caser (** params )
42
- model .summary ()
50
+ # TODO: 3. Load Data
51
+ train_data = ml .load_seq_ml (train_path , "train" , seq_len , neg_num , max_item_num , contain_user = True )
52
+ val_data = ml .load_seq_ml (val_path , "val" , seq_len , neg_num , max_item_num , contain_user = True )
53
+ test_data = ml .load_seq_ml (test_path , "test" , seq_len , 100 , max_item_num , contain_user = True )
54
+ # TODO: 4. Build Model
55
+ model = Caser (fea_cols , ** model_params )
43
56
model .compile (optimizer = Adam (learning_rate = learning_rate ))
44
- for epoch in range (1 , epochs + 1 ):
57
+ # TODO: 5. Fit Model
58
+ for epoch in range (1 , epochs + 1 ):
45
59
t1 = time ()
46
60
model .fit (
47
61
x = train_data ,
48
- validation_data = val_data ,
49
62
epochs = 1 ,
63
+ validation_data = val_data ,
50
64
batch_size = batch_size
51
65
)
52
- eval_dict = eval_pos_neg (model , test_data , batch_size , [ "hr" , " ndcg" ], k )
66
+ eval_dict = eval_pos_neg (model , test_data , [ 'hr' , 'mrr' , ' ndcg' ], k , batch_size )
53
67
t2 = time ()
54
- print ('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, NDCG = %.4f, '
55
- % (epoch , t2 - t1 , time () - t2 , eval_dict ['hr' ], eval_dict ['ndcg' ]))
68
+ print ('Iteration %d Fit [%.1f s], Evaluate [%.1f s]: HR = %.4f, MRR = %.4f, NDCG = %.4f '
69
+ % (epoch , t2 - t1 , time () - t2 , eval_dict ['hr' ], eval_dict ['mrr' ], eval_dict [ ' ndcg' ]))
56
70
57
71
58
72
main ()
0 commit comments