Skip to content

Commit 967c829

Browse files
committed
add reduce_mean for calculating curious loss
1 parent 7517807 commit 967c829

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

contents/Curiosity_Model/Curiosity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def _build_dynamics_net(self, s, a, s_):
7676
squared_diff = tf.reduce_sum(tf.square(encoded_s_ - dyn_s_), axis=1) # intrinsic reward
7777

7878
# It is better to reduce the learning rate in order to stay curious
79-
train_op = tf.train.RMSPropOptimizer(self.lr, name="dyn_opt").minimize(squared_diff)
79+
train_op = tf.train.RMSPropOptimizer(self.lr, name="dyn_opt").minimize(tf.reduce_mean(squared_diff))
8080
return dyn_s_, squared_diff, train_op
8181

8282
def _build_dqn(self, s, a, r, s_):

contents/Curiosity_Model/Random_Network_Distillation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ def _build_predictor(self, s_, rand_encode_s_):
7474
out = tf.layers.dense(net, self.s_encode_size)
7575

7676
with tf.name_scope("int_r"):
77-
ri = squared_diff = tf.reduce_sum(tf.square(rand_encode_s_ - out), axis=1) # intrinsic reward
77+
ri = tf.reduce_sum(tf.square(rand_encode_s_ - out), axis=1) # intrinsic reward
7878
train_op = tf.train.RMSPropOptimizer(self.lr, name="predictor_opt").minimize(
79-
squared_diff, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "predictor"))
79+
tf.reduce_mean(ri), var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "predictor"))
8080

8181
return ri, train_op
8282

0 commit comments

Comments
 (0)