|
17 | 17 | "import os\n", |
18 | 18 | "import random\n", |
19 | 19 | "import sys\n", |
| 20 | + "import psutil\n", |
20 | 21 | "import tensorflow as tf\n", |
21 | 22 | "\n", |
22 | 23 | "if \"../\" not in sys.path:\n", |
|
29 | 30 | { |
30 | 31 | "cell_type": "code", |
31 | 32 | "execution_count": null, |
32 | | - "metadata": { |
33 | | - "collapsed": false |
34 | | - }, |
| 33 | + "metadata": {}, |
35 | 34 | "outputs": [], |
36 | 35 | "source": [ |
37 | 36 | "env = gym.envs.make(\"Breakout-v0\")" |
|
40 | 39 | { |
41 | 40 | "cell_type": "code", |
42 | 41 | "execution_count": null, |
43 | | - "metadata": { |
44 | | - "collapsed": false |
45 | | - }, |
| 42 | + "metadata": {}, |
46 | 43 | "outputs": [], |
47 | 44 | "source": [ |
48 | 45 | "# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n", |
|
86 | 83 | { |
87 | 84 | "cell_type": "code", |
88 | 85 | "execution_count": null, |
89 | | - "metadata": { |
90 | | - "collapsed": false |
91 | | - }, |
| 86 | + "metadata": {}, |
92 | 87 | "outputs": [], |
93 | 88 | "source": [ |
94 | 89 | "class Estimator():\n", |
|
198 | 193 | { |
199 | 194 | "cell_type": "code", |
200 | 195 | "execution_count": null, |
201 | | - "metadata": { |
202 | | - "collapsed": false |
203 | | - }, |
| 196 | + "metadata": {}, |
204 | 197 | "outputs": [], |
205 | 198 | "source": [ |
206 | 199 | "# For Testing....\n", |
|
234 | 227 | "cell_type": "code", |
235 | 228 | "execution_count": null, |
236 | 229 | "metadata": { |
237 | | - "collapsed": false |
| 230 | + "collapsed": true |
238 | 231 | }, |
239 | 232 | "outputs": [], |
240 | 233 | "source": [ |
241 | | - "def copy_model_parameters(sess, estimator1, estimator2):\n", |
| 234 | + "class ModelParametersCopier():\n", |
242 | 235 | " \"\"\"\n", |
243 | | - " Copies the model parameters of one estimator to another.\n", |
244 | | - "\n", |
245 | | - " Args:\n", |
246 | | - " sess: Tensorflow session instance\n", |
247 | | - " estimator1: Estimator to copy the paramters from\n", |
248 | | - " estimator2: Estimator to copy the parameters to\n", |
| 236 | + " Copy model parameters of one estimator to another.\n", |
249 | 237 | " \"\"\"\n", |
250 | | - " e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n", |
251 | | - " e1_params = sorted(e1_params, key=lambda v: v.name)\n", |
252 | | - " e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n", |
253 | | - " e2_params = sorted(e2_params, key=lambda v: v.name)\n", |
254 | | - "\n", |
255 | | - " update_ops = []\n", |
256 | | - " for e1_v, e2_v in zip(e1_params, e2_params):\n", |
257 | | - " op = e2_v.assign(e1_v)\n", |
258 | | - " update_ops.append(op)\n", |
259 | | - "\n", |
260 | | - " sess.run(update_ops)" |
| 238 | + " \n", |
| 239 | + " def __init__(self, estimator1, estimator2):\n", |
| 240 | + " \"\"\"\n", |
| 241 | + " Defines copy-work operation graph. \n", |
| 242 | + " Args:\n", |
| 243 | + " estimator1: Estimator to copy the paramters from\n", |
| 244 | + " estimator2: Estimator to copy the parameters to\n", |
| 245 | + " \"\"\"\n", |
| 246 | + " e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]\n", |
| 247 | + " e1_params = sorted(e1_params, key=lambda v: v.name)\n", |
| 248 | + " e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]\n", |
| 249 | + " e2_params = sorted(e2_params, key=lambda v: v.name)\n", |
| 250 | + "\n", |
| 251 | + " self.update_ops = []\n", |
| 252 | + " for e1_v, e2_v in zip(e1_params, e2_params):\n", |
| 253 | + " op = e2_v.assign(e1_v)\n", |
| 254 | + " self.update_ops.append(op)\n", |
| 255 | + " \n", |
| 256 | + " def make(self, sess):\n", |
| 257 | + " \"\"\"\n", |
| 258 | + " Makes copy.\n", |
| 259 | + " Args:\n", |
| 260 | + " sess: Tensorflow session instance\n", |
| 261 | + " \"\"\"\n", |
| 262 | + " sess.run(self.update_ops)" |
261 | 263 | ] |
262 | 264 | }, |
263 | 265 | { |
|
293 | 295 | { |
294 | 296 | "cell_type": "code", |
295 | 297 | "execution_count": null, |
296 | | - "metadata": { |
297 | | - "collapsed": false |
298 | | - }, |
| 298 | + "metadata": {}, |
299 | 299 | "outputs": [], |
300 | 300 | "source": [ |
301 | 301 | "def deep_q_learning(sess,\n", |
|
347 | 347 | "\n", |
348 | 348 | " # The replay memory\n", |
349 | 349 | " replay_memory = []\n", |
| 350 | + " \n", |
| 351 | + " # Make model copier object\n", |
| 352 | + " estimator_copy = ModelParametersCopier(q_estimator, target_estimator)\n", |
350 | 353 | "\n", |
351 | 354 | " # Keeps track of useful statistics\n", |
352 | 355 | " stats = plotting.EpisodeStats(\n", |
353 | 356 | " episode_lengths=np.zeros(num_episodes),\n", |
354 | 357 | " episode_rewards=np.zeros(num_episodes))\n", |
| 358 | + " \n", |
| 359 | + " # For 'system/' summaries, usefull to check if currrent process looks healthy\n", |
| 360 | + " current_process = psutil.Process()\n", |
355 | 361 | "\n", |
356 | 362 | " # Create directories for checkpoints and summaries\n", |
357 | 363 | " checkpoint_dir = os.path.join(experiment_dir, \"checkpoints\")\n", |
|
422 | 428 | " # Epsilon for this time step\n", |
423 | 429 | " epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]\n", |
424 | 430 | "\n", |
425 | | - " # Add epsilon to Tensorboard\n", |
426 | | - " episode_summary = tf.Summary()\n", |
427 | | - " episode_summary.value.add(simple_value=epsilon, tag=\"epsilon\")\n", |
428 | | - " q_estimator.summary_writer.add_summary(episode_summary, total_t)\n", |
429 | | - "\n", |
430 | 431 | " # Maybe update the target estimator\n", |
431 | 432 | " if total_t % update_target_estimator_every == 0:\n", |
432 | | - " copy_model_parameters(sess, q_estimator, target_estimator)\n", |
| 433 | + " estimator_copy.make(sess)\n", |
433 | 434 | " print(\"\\nCopied model parameters to target network.\")\n", |
434 | 435 | "\n", |
435 | 436 | " # Print out which step we're on, useful for debugging.\n", |
|
475 | 476 | "\n", |
476 | 477 | " # Add summaries to tensorboard\n", |
477 | 478 | " episode_summary = tf.Summary()\n", |
478 | | - " episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name=\"episode_reward\", tag=\"episode_reward\")\n", |
479 | | - " episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name=\"episode_length\", tag=\"episode_length\")\n", |
480 | | - " q_estimator.summary_writer.add_summary(episode_summary, total_t)\n", |
| 479 | + " episode_summary.value.add(simple_value=epsilon, tag=\"episode/epsilon\")\n", |
| 480 | + " episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], tag=\"episode/reward\")\n", |
| 481 | + " episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], tag=\"episode/length\")\n", |
| 482 | + " episode_summary.value.add(simple_value=current_process.cpu_percent(), tag=\"system/cpu_usage_percent\")\n", |
| 483 | + " episode_summary.value.add(simple_value=current_process.memory_percent(memtype=\"vms\"), tag=\"system/v_memeory_usage_percent\")\n", |
| 484 | + " q_estimator.summary_writer.add_summary(episode_summary, i_episode)\n", |
481 | 485 | " q_estimator.summary_writer.flush()\n", |
482 | | - "\n", |
| 486 | + " \n", |
483 | 487 | " yield total_t, plotting.EpisodeStats(\n", |
484 | 488 | " episode_lengths=stats.episode_lengths[:i_episode+1],\n", |
485 | 489 | " episode_rewards=stats.episode_rewards[:i_episode+1])\n", |
|
490 | 494 | { |
491 | 495 | "cell_type": "code", |
492 | 496 | "execution_count": null, |
493 | | - "metadata": { |
494 | | - "collapsed": false |
495 | | - }, |
| 497 | + "metadata": {}, |
496 | 498 | "outputs": [], |
497 | 499 | "source": [ |
498 | 500 | "tf.reset_default_graph()\n", |
|
504 | 506 | "global_step = tf.Variable(0, name='global_step', trainable=False)\n", |
505 | 507 | " \n", |
506 | 508 | "# Create estimators\n", |
507 | | - "q_estimator = Estimator(scope=\"q\", summaries_dir=experiment_dir)\n", |
| 509 | + "q_estimator = Estimator(scope=\"q_estimator\", summaries_dir=experiment_dir)\n", |
508 | 510 | "target_estimator = Estimator(scope=\"target_q\")\n", |
509 | 511 | "\n", |
510 | 512 | "# State processor\n", |
|
531 | 533 | "\n", |
532 | 534 | " print(\"\\nEpisode Reward: {}\".format(stats.episode_rewards[-1]))" |
533 | 535 | ] |
| 536 | + }, |
| 537 | + { |
| 538 | + "cell_type": "code", |
| 539 | + "execution_count": null, |
| 540 | + "metadata": { |
| 541 | + "collapsed": true |
| 542 | + }, |
| 543 | + "outputs": [], |
| 544 | + "source": [] |
| 545 | + }, |
| 546 | + { |
| 547 | + "cell_type": "code", |
| 548 | + "execution_count": null, |
| 549 | + "metadata": { |
| 550 | + "collapsed": true |
| 551 | + }, |
| 552 | + "outputs": [], |
| 553 | + "source": [] |
534 | 554 | } |
535 | 555 | ], |
536 | 556 | "metadata": { |
|
549 | 569 | "name": "python", |
550 | 570 | "nbconvert_exporter": "python", |
551 | 571 | "pygments_lexer": "ipython3", |
552 | | - "version": "3.4.3" |
| 572 | + "version": "3.6.0" |
553 | 573 | } |
554 | 574 | }, |
555 | 575 | "nbformat": 4, |
556 | | - "nbformat_minor": 0 |
| 576 | + "nbformat_minor": 1 |
557 | 577 | } |
0 commit comments