Skip to content

Commit 71223d5

Browse files
committed
DQN copy_model_parameters memory leak fixed, tensorboard summaries updated with cpu/mem usage
1 parent 67e98a9 commit 71223d5

File tree

1 file changed

+70
-50
lines changed

1 file changed

+70
-50
lines changed

DQN/Deep Q Learning Solution.ipynb

Lines changed: 70 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"import os\n",
1818
"import random\n",
1919
"import sys\n",
20+
"import psutil\n",
2021
"import tensorflow as tf\n",
2122
"\n",
2223
"if \"../\" not in sys.path:\n",
@@ -29,9 +30,7 @@
2930
{
3031
"cell_type": "code",
3132
"execution_count": null,
32-
"metadata": {
33-
"collapsed": false
34-
},
33+
"metadata": {},
3534
"outputs": [],
3635
"source": [
3736
"env = gym.envs.make(\"Breakout-v0\")"
@@ -40,9 +39,7 @@
4039
{
4140
"cell_type": "code",
4241
"execution_count": null,
43-
"metadata": {
44-
"collapsed": false
45-
},
42+
"metadata": {},
4643
"outputs": [],
4744
"source": [
4845
"# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -86,9 +83,7 @@
8683
{
8784
"cell_type": "code",
8885
"execution_count": null,
89-
"metadata": {
90-
"collapsed": false
91-
},
86+
"metadata": {},
9287
"outputs": [],
9388
"source": [
9489
"class Estimator():\n",
@@ -198,9 +193,7 @@
198193
{
199194
"cell_type": "code",
200195
"execution_count": null,
201-
"metadata": {
202-
"collapsed": false
203-
},
196+
"metadata": {},
204197
"outputs": [],
205198
"source": [
206199
"# For Testing....\n",
@@ -234,30 +227,39 @@
234227
"cell_type": "code",
235228
"execution_count": null,
236229
"metadata": {
237-
"collapsed": false
230+
"collapsed": true
238231
},
239232
"outputs": [],
240233
"source": [
241-
"def copy_model_parameters(sess, estimator1, estimator2):\n",
234+
"class ModelParametersCopier():\n",
242235
" \"\"\"\n",
243-
" Copies the model parameters of one estimator to another.\n",
244-
"\n",
245-
" Args:\n",
246-
" sess: Tensorflow session instance\n",
247-
" estimator1: Estimator to copy the paramters from\n",
248-
" estimator2: Estimator to copy the parameters to\n",
236+
" Copy model parameters of one estimator to another.\n",
249237
" \"\"\"\n",
250-
" e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n",
251-
" e1_params = sorted(e1_params, key=lambda v: v.name)\n",
252-
" e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n",
253-
" e2_params = sorted(e2_params, key=lambda v: v.name)\n",
254-
"\n",
255-
" update_ops = []\n",
256-
" for e1_v, e2_v in zip(e1_params, e2_params):\n",
257-
" op = e2_v.assign(e1_v)\n",
258-
" update_ops.append(op)\n",
259-
"\n",
260-
" sess.run(update_ops)"
238+
" \n",
239+
" def __init__(self, estimator1, estimator2):\n",
240+
" \"\"\"\n",
241+
" Defines copy-work operation graph. \n",
242+
" Args:\n",
243+
" estimator1: Estimator to copy the paramters from\n",
244+
" estimator2: Estimator to copy the parameters to\n",
245+
" \"\"\"\n",
246+
" e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n",
247+
" e1_params = sorted(e1_params, key=lambda v: v.name)\n",
248+
" e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n",
249+
" e2_params = sorted(e2_params, key=lambda v: v.name)\n",
250+
"\n",
251+
" self.update_ops = []\n",
252+
" for e1_v, e2_v in zip(e1_params, e2_params):\n",
253+
" op = e2_v.assign(e1_v)\n",
254+
" self.update_ops.append(op)\n",
255+
" \n",
256+
" def make(self, sess):\n",
257+
" \"\"\"\n",
258+
" Makes copy.\n",
259+
" Args:\n",
260+
" sess: Tensorflow session instance\n",
261+
" \"\"\"\n",
262+
" sess.run(self.update_ops)"
261263
]
262264
},
263265
{
@@ -293,9 +295,7 @@
293295
{
294296
"cell_type": "code",
295297
"execution_count": null,
296-
"metadata": {
297-
"collapsed": false
298-
},
298+
"metadata": {},
299299
"outputs": [],
300300
"source": [
301301
"def deep_q_learning(sess,\n",
@@ -347,11 +347,17 @@
347347
"\n",
348348
" # The replay memory\n",
349349
" replay_memory = []\n",
350+
" \n",
351+
" # Make model copier object\n",
352+
" estimator_copy = ModelParametersCopier(q_estimator, target_estimator)\n",
350353
"\n",
351354
" # Keeps track of useful statistics\n",
352355
" stats = plotting.EpisodeStats(\n",
353356
" episode_lengths=np.zeros(num_episodes),\n",
354357
" episode_rewards=np.zeros(num_episodes))\n",
358+
" \n",
359+
" # For 'system/' summaries, usefull to check if currrent process looks healthy\n",
360+
" current_process = psutil.Process()\n",
355361
"\n",
356362
" # Create directories for checkpoints and summaries\n",
357363
" checkpoint_dir = os.path.join(experiment_dir, \"checkpoints\")\n",
@@ -422,14 +428,9 @@
422428
" # Epsilon for this time step\n",
423429
" epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]\n",
424430
"\n",
425-
" # Add epsilon to Tensorboard\n",
426-
" episode_summary = tf.Summary()\n",
427-
" episode_summary.value.add(simple_value=epsilon, tag=\"epsilon\")\n",
428-
" q_estimator.summary_writer.add_summary(episode_summary, total_t)\n",
429-
"\n",
430431
" # Maybe update the target estimator\n",
431432
" if total_t % update_target_estimator_every == 0:\n",
432-
" copy_model_parameters(sess, q_estimator, target_estimator)\n",
433+
" estimator_copy.make(sess)\n",
433434
" print(\"\\nCopied model parameters to target network.\")\n",
434435
"\n",
435436
" # Print out which step we're on, useful for debugging.\n",
@@ -475,11 +476,14 @@
475476
"\n",
476477
" # Add summaries to tensorboard\n",
477478
" episode_summary = tf.Summary()\n",
478-
" episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name=\"episode_reward\", tag=\"episode_reward\")\n",
479-
" episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name=\"episode_length\", tag=\"episode_length\")\n",
480-
" q_estimator.summary_writer.add_summary(episode_summary, total_t)\n",
479+
" episode_summary.value.add(simple_value=epsilon, tag=\"episode/epsilon\")\n",
480+
" episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], tag=\"episode/reward\")\n",
481+
" episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], tag=\"episode/length\")\n",
482+
" episode_summary.value.add(simple_value=current_process.cpu_percent(), tag=\"system/cpu_usage_percent\")\n",
483+
" episode_summary.value.add(simple_value=current_process.memory_percent(memtype=\"vms\"), tag=\"system/v_memeory_usage_percent\")\n",
484+
" q_estimator.summary_writer.add_summary(episode_summary, i_episode)\n",
481485
" q_estimator.summary_writer.flush()\n",
482-
"\n",
486+
" \n",
483487
" yield total_t, plotting.EpisodeStats(\n",
484488
" episode_lengths=stats.episode_lengths[:i_episode+1],\n",
485489
" episode_rewards=stats.episode_rewards[:i_episode+1])\n",
@@ -490,9 +494,7 @@
490494
{
491495
"cell_type": "code",
492496
"execution_count": null,
493-
"metadata": {
494-
"collapsed": false
495-
},
497+
"metadata": {},
496498
"outputs": [],
497499
"source": [
498500
"tf.reset_default_graph()\n",
@@ -504,7 +506,7 @@
504506
"global_step = tf.Variable(0, name='global_step', trainable=False)\n",
505507
" \n",
506508
"# Create estimators\n",
507-
"q_estimator = Estimator(scope=\"q\", summaries_dir=experiment_dir)\n",
509+
"q_estimator = Estimator(scope=\"q_estimator\", summaries_dir=experiment_dir)\n",
508510
"target_estimator = Estimator(scope=\"target_q\")\n",
509511
"\n",
510512
"# State processor\n",
@@ -531,6 +533,24 @@
531533
"\n",
532534
" print(\"\\nEpisode Reward: {}\".format(stats.episode_rewards[-1]))"
533535
]
536+
},
537+
{
538+
"cell_type": "code",
539+
"execution_count": null,
540+
"metadata": {
541+
"collapsed": true
542+
},
543+
"outputs": [],
544+
"source": []
545+
},
546+
{
547+
"cell_type": "code",
548+
"execution_count": null,
549+
"metadata": {
550+
"collapsed": true
551+
},
552+
"outputs": [],
553+
"source": []
534554
}
535555
],
536556
"metadata": {
@@ -549,9 +569,9 @@
549569
"name": "python",
550570
"nbconvert_exporter": "python",
551571
"pygments_lexer": "ipython3",
552-
"version": "3.4.3"
572+
"version": "3.6.0"
553573
}
554574
},
555575
"nbformat": 4,
556-
"nbformat_minor": 0
576+
"nbformat_minor": 1
557577
}

0 commit comments

Comments
 (0)