Skip to content

Commit 842fa70

Browse files
committed
Move fc layer into shared params
1 parent 8dba5f9 commit 842fa70

File tree

1 file changed

+19
-17
lines changed

1 file changed

+19
-17
lines changed

PolicyGradient/a3c/estimators.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@ def __init__(self, num_outputs, reuse=False, trainable=True):
3030
conv3 = tf.contrib.layers.conv2d(
3131
conv2, 64, 3, 1, activation_fn=tf.nn.relu)
3232

33-
with tf.variable_scope("policy_net"):
3433
flattened = tf.contrib.layers.flatten(conv3)
35-
fc1 = tf.contrib.layers.fully_connected(flattened, 512)
34+
fc1 = tf.contrib.layers.fully_connected(
35+
inputs=flattened,
36+
num_outputs=256)
37+
38+
with tf.variable_scope("policy_net"):
3639
self.logits = tf.contrib.layers.fully_connected(fc1, num_outputs)
3740
self.probs = tf.nn.softmax(self.logits)
3841

@@ -65,8 +68,7 @@ def __init__(self, num_outputs, reuse=False, trainable=True):
6568
global_step=tf.contrib.framework.get_global_step(),
6669
learning_rate=0.00025,
6770
optimizer=self.optimizer,
68-
clip_gradients=5.0,
69-
name="policy_net",
71+
# clip_gradients=5.0,
7072
summaries=tf.contrib.layers.optimizers.OPTIMIZER_SUMMARIES)
7173

7274
summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES)
@@ -99,14 +101,15 @@ def __init__(self, reuse=False, trainable=True):
99101
conv3 = tf.contrib.layers.conv2d(
100102
conv2, 64, 3, 1, activation_fn=tf.nn.relu)
101103

102-
with tf.variable_scope("value_net"):
103104
flattened = tf.contrib.layers.flatten(conv3)
104105
fc1 = tf.contrib.layers.fully_connected(
105106
inputs=flattened,
106-
num_outputs=512,
107-
weights_initializer=tf.zeros_initializer,
108-
biases_initializer=tf.zeros_initializer)
109-
self.logits = tf.contrib.layers.fully_connected(fc1, 1)
107+
num_outputs=256)
108+
109+
with tf.variable_scope("value_net"):
110+
self.logits = tf.contrib.layers.fully_connected(
111+
inputs=fc1,
112+
num_outputs=1)
110113
self.logits = tf.squeeze(self.logits, squeeze_dims=[1])
111114

112115
self.losses = tf.squared_difference(self.logits, self.targets)
@@ -127,18 +130,17 @@ def __init__(self, reuse=False, trainable=True):
127130
global_step=tf.contrib.framework.get_global_step(),
128131
learning_rate=0.00025,
129132
optimizer=self.optimizer,
130-
clip_gradients=5.0,
131-
name="value_net",
133+
# clip_gradients=5.0,
132134
summaries=tf.contrib.layers.optimizers.OPTIMIZER_SUMMARIES)
133135

134136
# Summaries
135137
max_value = tf.reduce_max(self.logits)
136-
tf.scalar_summary("value_net_loss", self.loss)
137-
tf.scalar_summary("max_value", max_value)
138-
tf.histogram_summary("reward_targets", self.targets)
139-
tf.scalar_summary("max_reward", tf.reduce_max(self.targets))
140-
tf.scalar_summary("min_reward", tf.reduce_min(self.targets))
141-
tf.scalar_summary("mean_reward", tf.reduce_mean(self.targets))
138+
tf.scalar_summary("value_net/loss", self.loss)
139+
tf.scalar_summary("value_net/max_value", max_value)
140+
tf.histogram_summary("value_net/reward_targets", self.targets)
141+
tf.scalar_summary("value_net/reward_max", tf.reduce_max(self.targets))
142+
tf.scalar_summary("value_net/reward_min", tf.reduce_min(self.targets))
143+
tf.scalar_summary("value_net/reward_mean", tf.reduce_mean(self.targets))
142144

143145
summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES)
144146
self.summaries = tf.merge_summary([s for s in summary_ops if "value_net" in s.name])

0 commit comments

Comments
 (0)