@@ -74,12 +74,12 @@ def __init__(self, num_outputs, reuse=False, trainable=True):
7474 tf .histogram_summary ("policy_net/actions" , self .actions )
7575
7676 # Optimizer Parameters from original paper
77- self .optimizer = tf .train .RMSPropOptimizer (0.00025 , 0.99 , 0.0 , 1e-6 )
78- # self.optimizer = tf.train.AdamOptimizer(1e-4)
77+ # self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
78+ self .optimizer = tf .train .AdamOptimizer (1e-4 )
7979 self .train_op = tf .contrib .layers .optimize_loss (
8080 loss = self .loss ,
8181 global_step = tf .contrib .framework .get_global_step (),
82- learning_rate = 0.00025 ,
82+ learning_rate = 1e-4 ,
8383 optimizer = self .optimizer ,
8484 # clip_gradients=5.0,
8585 summaries = tf .contrib .layers .optimizers .OPTIMIZER_SUMMARIES )
@@ -126,12 +126,12 @@ def __init__(self, reuse=False, trainable=True):
126126 return
127127
128128 # Optimizer Parameters from original paper
129- self .optimizer = tf .train .RMSPropOptimizer (0.00025 , 0.99 , 0.0 , 1e-6 )
130- # self.optimizer = tf.train.AdamOptimizer(1e-4)
129+ # self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
130+ self .optimizer = tf .train .AdamOptimizer (1e-4 )
131131 self .train_op = tf .contrib .layers .optimize_loss (
132132 loss = self .loss ,
133133 global_step = tf .contrib .framework .get_global_step (),
134- learning_rate = 0.00025 ,
134+ learning_rate = 1e-4 ,
135135 optimizer = self .optimizer ,
136136 # clip_gradients=5.0,
137137 summaries = tf .contrib .layers .optimizers .OPTIMIZER_SUMMARIES )
0 commit comments