Skip to content

Commit f117e5d

Browse files
authored
Merge pull request dennybritz#36 from alvarosg/bug-epsilons-total-t
BUG: Error getting epsilon when populating replay memory after resuming training
2 parents 0ee1db4 + 5b64a9c commit f117e5d

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

DQN/Deep Q Learning Solution.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@
387387
" state = state_processor.process(sess, state)\n",
388388
" state = np.stack([state] * 4, axis=2)\n",
389389
" for i in range(replay_memory_init_size):\n",
390-
" action_probs = policy(sess, state, epsilons[total_t])\n",
390+
" action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])\n",
391391
" action = np.random.choice(np.arange(len(action_probs)), p=action_probs)\n",
392392
" next_state, reward, done, _ = env.step(VALID_ACTIONS[action])\n",
393393
" next_state = state_processor.process(sess, next_state)\n",

DQN/dqn.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def deep_q_learning(sess,
278278
state = state_processor.process(sess, state)
279279
state = np.stack([state] * 4, axis=2)
280280
for i in range(replay_memory_init_size):
281-
action_probs = policy(sess, state, epsilons[total_t])
281+
action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])
282282
action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
283283
next_state, reward, done, _ = env.step(VALID_ACTIONS[action])
284284
next_state = state_processor.process(sess, next_state)
@@ -415,4 +415,5 @@ def deep_q_learning(sess,
415415
discount_factor=0.99,
416416
batch_size=32):
417417

418-
print("\nEpisode Reward: {}".format(stats.episode_rewards[-1]))
418+
print("\nEpisode Reward: {}".format(stats.episode_rewards[-1]))
419+

0 commit comments

Comments
 (0)