diff --git a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
index 63f95a3ba..54a8049e5 100644
--- a/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
+++ b/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
@@ -110,8 +110,8 @@ def _build_discrete_net(self, input_layer, action_space):
         self.policy_mean = tf.nn.softmax(policy_values, name="policy")
 
         # define the distributions for the policy and the old policy
-        self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean)
-        self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean)
+        self.policy_distribution = tf.contrib.distributions.Categorical(probs=self.policy_mean + eps)
+        self.old_policy_distribution = tf.contrib.distributions.Categorical(probs=self.old_policy_mean + eps)
 
         self.output = self.policy_mean