@@ -30,9 +30,12 @@ def __init__(self, num_outputs, reuse=False, trainable=True):
3030 conv3 = tf .contrib .layers .conv2d (
3131 conv2 , 64 , 3 , 1 , activation_fn = tf .nn .relu )
3232
33- with tf .variable_scope ("policy_net" ):
3433 flattened = tf .contrib .layers .flatten (conv3 )
35- fc1 = tf .contrib .layers .fully_connected (flattened , 512 )
34+ fc1 = tf .contrib .layers .fully_connected (
35+ inputs = flattened ,
36+ num_outputs = 256 )
37+
38+ with tf .variable_scope ("policy_net" ):
3639 self .logits = tf .contrib .layers .fully_connected (fc1 , num_outputs )
3740 self .probs = tf .nn .softmax (self .logits )
3841
@@ -65,8 +68,7 @@ def __init__(self, num_outputs, reuse=False, trainable=True):
6568 global_step = tf .contrib .framework .get_global_step (),
6669 learning_rate = 0.00025 ,
6770 optimizer = self .optimizer ,
68- clip_gradients = 5.0 ,
69- name = "policy_net" ,
71+ # clip_gradients=5.0,
7072 summaries = tf .contrib .layers .optimizers .OPTIMIZER_SUMMARIES )
7173
7274 summary_ops = tf .get_collection (tf .GraphKeys .SUMMARIES )
@@ -99,14 +101,15 @@ def __init__(self, reuse=False, trainable=True):
99101 conv3 = tf .contrib .layers .conv2d (
100102 conv2 , 64 , 3 , 1 , activation_fn = tf .nn .relu )
101103
102- with tf .variable_scope ("value_net" ):
103104 flattened = tf .contrib .layers .flatten (conv3 )
104105 fc1 = tf .contrib .layers .fully_connected (
105106 inputs = flattened ,
106- num_outputs = 512 ,
107- weights_initializer = tf .zeros_initializer ,
108- biases_initializer = tf .zeros_initializer )
109- self .logits = tf .contrib .layers .fully_connected (fc1 , 1 )
107+ num_outputs = 256 )
108+
109+ with tf .variable_scope ("value_net" ):
110+ self .logits = tf .contrib .layers .fully_connected (
111+ inputs = fc1 ,
112+ num_outputs = 1 )
110113 self .logits = tf .squeeze (self .logits , squeeze_dims = [1 ])
111114
112115 self .losses = tf .squared_difference (self .logits , self .targets )
@@ -127,18 +130,17 @@ def __init__(self, reuse=False, trainable=True):
127130 global_step = tf .contrib .framework .get_global_step (),
128131 learning_rate = 0.00025 ,
129132 optimizer = self .optimizer ,
130- clip_gradients = 5.0 ,
131- name = "value_net" ,
133+ # clip_gradients=5.0,
132134 summaries = tf .contrib .layers .optimizers .OPTIMIZER_SUMMARIES )
133135
134136 # Summaries
135137 max_value = tf .reduce_max (self .logits )
136- tf .scalar_summary ("value_net_loss " , self .loss )
137- tf .scalar_summary ("max_value" , max_value )
138- tf .histogram_summary ("reward_targets" , self .targets )
139- tf .scalar_summary ("max_reward " , tf .reduce_max (self .targets ))
140- tf .scalar_summary ("min_reward " , tf .reduce_min (self .targets ))
141- tf .scalar_summary ("mean_reward " , tf .reduce_mean (self .targets ))
138+ tf .scalar_summary ("value_net/loss " , self .loss )
139+ tf .scalar_summary ("value_net/ max_value" , max_value )
140+ tf .histogram_summary ("value_net/ reward_targets" , self .targets )
141+ tf .scalar_summary ("value_net/reward_max " , tf .reduce_max (self .targets ))
142+ tf .scalar_summary ("value_net/reward_min " , tf .reduce_min (self .targets ))
143+ tf .scalar_summary ("value_net/reward_mean " , tf .reduce_mean (self .targets ))
142144
143145 summary_ops = tf .get_collection (tf .GraphKeys .SUMMARIES )
144146 self .summaries = tf .merge_summary ([s for s in summary_ops if "value_net" in s .name ])
0 commit comments