@@ -225,7 +225,7 @@ def deep_q_learning(sess,
225225 Transition = namedtuple ("Transition" , ["state" , "action" , "reward" , "next_state" , "done" ])
226226
227227 # The replay memory
228- replay_memory = deque ( maxlen = replay_memory_size )
228+ replay_memory = list ( )
229229
230230 # Keeps track of useful statistics
231231 stats = plotting .EpisodeStats (
@@ -318,6 +318,9 @@ def deep_q_learning(sess,
318318 # Save transition to replay memory
319319 replay_memory .append (Transition (state , action , reward , next_state , done ))
320320
321+ if len (replay_memory ) > replay_memory_size :
322+ replay_memory .popleft ()
323+
321324 # Update statistics
322325 stats .episode_rewards [i_episode ] += reward
323326 stats .episode_lengths [i_episode ] = t
@@ -374,7 +377,7 @@ def deep_q_learning(sess,
374377 target_estimator = target_estimator ,
375378 experiment_dir = experiment_dir ,
376379 num_episodes = 50000 ,
377- replay_memory_size = 500000 ,
380+ replay_memory_size = 1000000 ,
378381 replay_memory_init_size = 50000 ,
379382 update_target_estimator_every = 10000 ,
380383 epsilon_start = 1.0 ,
0 commit comments