Merge pull request dennybritz#36 from alvarosg/bug-epsilons-total-t

dennybritz · web-flow · commit f117e5d7287d · 2016-11-27T14:26:27.000-08:00
BUG: Error getting epsilon when populating replay memory after resuming training
diff --git a/DQN/Deep Q Learning Solution.ipynb b/DQN/Deep Q Learning Solution.ipynb
@@ -387,7 +387,7 @@
     "    state = state_processor.process(sess, state)\n",
     "    state = np.stack([state] * 4, axis=2)\n",
     "    for i in range(replay_memory_init_size):\n",
-    "        action_probs = policy(sess, state, epsilons[total_t])\n",
+    "        action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])\n",
     "        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)\n",
     "        next_state, reward, done, _ = env.step(VALID_ACTIONS[action])\n",
     "        next_state = state_processor.process(sess, next_state)\n",
diff --git a/DQN/dqn.py b/DQN/dqn.py
@@ -278,7 +278,7 @@ def deep_q_learning(sess,
     state = state_processor.process(sess, state)
     state = np.stack([state] * 4, axis=2)
     for i in range(replay_memory_init_size):
-        action_probs = policy(sess, state, epsilons[total_t])
+        action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])
         action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
         next_state, reward, done, _ = env.step(VALID_ACTIONS[action])
         next_state = state_processor.process(sess, next_state)
@@ -415,4 +415,5 @@ def deep_q_learning(sess,
                                     discount_factor=0.99,
                                     batch_size=32):
 
-        print("\nEpisode Reward: {}".format(stats.episode_rewards[-1]))
+        print("\nEpisode Reward: {}".format(stats.episode_rewards[-1]))
+