10
10
def train ():
11
11
parser = argparse .ArgumentParser ()
12
12
13
- parser .add_argument ("-c" , "--train_dataset" , required = True , type = str )
14
- parser .add_argument ("-t" , "--test_dataset" , type = str , default = None )
15
- parser .add_argument ("-v" , "--vocab_path" , required = True , type = str )
16
- parser .add_argument ("-o" , "--output_path" , required = True , type = str )
17
-
18
- parser .add_argument ("-hs" , "--hidden" , type = int , default = 256 )
19
- parser .add_argument ("-l" , "--layers" , type = int , default = 8 )
20
- parser .add_argument ("-a" , "--attn_heads" , type = int , default = 8 )
21
- parser .add_argument ("-s" , "--seq_len" , type = int , default = 20 )
22
-
23
- parser .add_argument ("-b" , "--batch_size" , type = int , default = 64 )
24
- parser .add_argument ("-e" , "--epochs" , type = int , default = 10 )
25
- parser .add_argument ("-w" , "--num_workers" , type = int , default = 5 )
26
-
27
- parser .add_argument ("--with_cuda" , type = bool , default = True )
28
- parser .add_argument ("--log_freq" , type = int , default = 10 )
29
- parser .add_argument ("--corpus_lines" , type = int , default = None )
30
- parser .add_argument ("--cuda_devices" , type = int , nargs = '+' , default = None )
31
-
32
- parser .add_argument ("--lr" , type = float , default = 1e-3 )
33
- parser .add_argument ("--adam_weight_decay" , type = float , default = 0.01 )
34
- parser .add_argument ("--adam_beta1" , type = float , default = 0.9 )
35
- parser .add_argument ("--adam_beta2" , type = float , default = 0.999 )
13
+ parser .add_argument ("-c" , "--train_dataset" , required = True , type = str , help = "train dataset for train bert" )
14
+ parser .add_argument ("-t" , "--test_dataset" , type = str , default = None , help = "test set for evaluate train set" )
15
+ parser .add_argument ("-v" , "--vocab_path" , required = True , type = str , help = "built vocab model path with bert-vocab" )
16
+ parser .add_argument ("-o" , "--output_path" , required = True , type = str , help = "ex)output/bert.model" )
17
+
18
+ parser .add_argument ("-hs" , "--hidden" , type = int , default = 256 , help = "hidden size of transformer model" )
19
+ parser .add_argument ("-l" , "--layers" , type = int , default = 8 , help = "number of layers" )
20
+ parser .add_argument ("-a" , "--attn_heads" , type = int , default = 8 , help = "number of attention heads" )
21
+ parser .add_argument ("-s" , "--seq_len" , type = int , default = 20 , help = "maximum sequence len" )
22
+
23
+ parser .add_argument ("-b" , "--batch_size" , type = int , default = 64 , help = "number of batch_size" )
24
+ parser .add_argument ("-e" , "--epochs" , type = int , default = 10 , help = "number of epochs" )
25
+ parser .add_argument ("-w" , "--num_workers" , type = int , default = 5 , help = "dataloader worker size" )
26
+
27
+ parser .add_argument ("--with_cuda" , type = bool , default = True , help = "training with CUDA: true, or false" )
28
+ parser .add_argument ("--log_freq" , type = int , default = 10 , help = "printing loss every n iter: setting n" )
29
+ parser .add_argument ("--corpus_lines" , type = int , default = None , help = "total number of lines in corpus" )
30
+ parser .add_argument ("--cuda_devices" , type = int , nargs = '+' , default = None , help = "CUDA device ids" )
31
+ parser .add_argument ("--on_memory" , type = bool , default = True , help = "Loading on memory: true or false" )
32
+
33
+ parser .add_argument ("--lr" , type = float , default = 1e-3 , help = "learning rate of adam" )
34
+ parser .add_argument ("--adam_weight_decay" , type = float , default = 0.01 , help = "weight_decay of adam" )
35
+ parser .add_argument ("--adam_beta1" , type = float , default = 0.9 , help = "adam first beta value" )
36
+ parser .add_argument ("--adam_beta2" , type = float , default = 0.999 , help = "adam first beta value" )
36
37
37
38
args = parser .parse_args ()
38
39
@@ -41,11 +42,12 @@ def train():
41
42
print ("Vocab Size: " , len (vocab ))
42
43
43
44
print ("Loading Train Dataset" , args .train_dataset )
44
- train_dataset = BERTDataset (args .train_dataset , vocab , seq_len = args .seq_len , corpus_lines = args .corpus_lines )
45
+ train_dataset = BERTDataset (args .train_dataset , vocab , seq_len = args .seq_len ,
46
+ corpus_lines = args .corpus_lines , on_memory = args .on_memory )
45
47
46
48
print ("Loading Test Dataset" , args .test_dataset )
47
- test_dataset = BERTDataset (args .test_dataset , vocab ,
48
- seq_len = args . seq_len ) if args .test_dataset is not None else None
49
+ test_dataset = BERTDataset (args .test_dataset , vocab , seq_len = args . seq_len , on_memory = args . on_memory ) \
50
+ if args .test_dataset is not None else None
49
51
50
52
print ("Creating Dataloader" )
51
53
train_data_loader = DataLoader (train_dataset , batch_size = args .batch_size , num_workers = args .num_workers )
0 commit comments