fix tensorflow and gym version incompetibility

rockingdingo · rockingdingo · commit fd66ca1cc44d · 2017-02-03T10:58:09.000+08:00
diff --git a/DQN/Deep Q Learning Solution.ipynb b/DQN/Deep Q Learning Solution.ipynb
@@ -11,16 +11,14 @@
     "%matplotlib inline\n",
     "\n",
     "import gym\n",
+    "from gym.wrappers import Monitor\n",
     "import itertools\n",
     "import numpy as np\n",
     "import os\n",
     "import random\n",
     "import sys\n",
     "import tensorflow as tf\n",
     "\n",
-    "from distutils.version import StrictVersion\n",
-    "tf_version_info = tf.__version__.split(\"-\")[0]   # 0.12.0-rc0 to 0.12.0,future release alpha 1.*.*\n",
-    "\n",
     "if \"../\" not in sys.path:\n",
     "  sys.path.append(\"../\")\n",
     "\n",
@@ -69,16 +67,10 @@
     "            self.input_state = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8)\n",
     "            self.output = tf.image.rgb_to_grayscale(self.input_state)\n",
     "            self.output = tf.image.crop_to_bounding_box(self.output, 34, 0, 160, 160)\n",
-    "            \n",
-    "            # tf.image.resize_images() function changed after tf version 0.11.0\n",
-    "            if (StrictVersion(tf_version_info) >= StrictVersion('0.11.0')):\n",
-    "                self.output = tf.image.resize_images(\n",
-    "                    self.output, [84, 84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)\n",
-    "            else:\n",
-    "                self.output = tf.image.resize_images(\n",
-    "                    self.output, 84, 84, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)\n",
+    "            self.output = tf.image.resize_images(\n",
+    "                self.output, [84, 84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)\n",
     "            self.output = tf.squeeze(self.output)\n",
-    "    \n",
+    "\n",
     "    def process(self, sess, state):\n",
     "        \"\"\"\n",
     "        Args:\n",
@@ -116,7 +108,7 @@
     "                summary_dir = os.path.join(summaries_dir, \"summaries_{}\".format(scope))\n",
     "                if not os.path.exists(summary_dir):\n",
     "                    os.makedirs(summary_dir)\n",
-    "                self.summary_writer = tf.train.SummaryWriter(summary_dir)\n",
+    "                self.summary_writer = tf.summary.FileWriter(summary_dir)\n",
     "\n",
     "    def _build_model(self):\n",
     "        \"\"\"\n",
@@ -160,14 +152,13 @@
     "        self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step())\n",
     "\n",
     "        # Summaries for Tensorboard\n",
-    "        self.summaries = tf.merge_summary([\n",
-    "            tf.scalar_summary(\"loss\", self.loss),\n",
-    "            tf.histogram_summary(\"loss_hist\", self.losses),\n",
-    "            tf.histogram_summary(\"q_values_hist\", self.predictions),\n",
-    "            tf.scalar_summary(\"max_q_value\", tf.reduce_max(self.predictions))\n",
+    "        self.summaries = tf.summary.merge([\n",
+    "            tf.summary.scalar(\"loss\", self.loss),\n",
+    "            tf.summary.histogram(\"loss_hist\", self.losses),\n",
+    "            tf.summary.histogram(\"q_values_hist\", self.predictions),\n",
+    "            tf.summary.scalar(\"max_q_value\", tf.reduce_max(self.predictions))\n",
     "        ])\n",
     "\n",
-    "\n",
     "    def predict(self, sess, s):\n",
     "        \"\"\"\n",
     "        Predicts action values.\n",
@@ -221,7 +212,7 @@
     "sp = StateProcessor()\n",
     "\n",
     "with tf.Session() as sess:\n",
-    "    sess.run(tf.initialize_all_variables())\n",
+    "    sess.run(tf.global_variables_initializer())\n",
     "    \n",
     "    # Example observation batch\n",
     "    observation = env.reset()\n",
@@ -366,7 +357,9 @@
     "    checkpoint_dir = os.path.join(experiment_dir, \"checkpoints\")\n",
     "    checkpoint_path = os.path.join(checkpoint_dir, \"model\")\n",
     "    monitor_path = os.path.join(experiment_dir, \"monitor\")\n",
-    "\n",
+    "    # Add env Monitor wrapper\n",
+    "    env = Monitor(env, directory=monitor_path, video_callable=lambda x: True, resume=True)\n",
+    "    \n",
     "    if not os.path.exists(checkpoint_dir):\n",
     "        os.makedirs(checkpoint_dir)\n",
     "    if not os.path.exists(monitor_path):\n",
@@ -409,10 +402,6 @@
     "        else:\n",
     "            state = next_state\n",
     "\n",
-    "    # Record videos\n",
-    "    env.monitor.start(monitor_path,\n",
-    "                      resume=True,\n",
-    "                      video_callable=lambda count: count % record_video_every == 0)\n",
     "\n",
     "    for i_episode in range(num_episodes):\n",
     "\n",
@@ -493,7 +482,6 @@
     "            episode_lengths=stats.episode_lengths[:i_episode+1],\n",
     "            episode_rewards=stats.episode_rewards[:i_episode+1])\n",
     "\n",
-    "    env.monitor.close()\n",
     "    return stats"
    ]
   },
@@ -522,7 +510,7 @@
     "\n",
     "# Run it!\n",
     "with tf.Session() as sess:\n",
-    "    sess.run(tf.initialize_all_variables())\n",
+    "    sess.run(tf.global_variables_initializer())\n",
     "    for t, stats in deep_q_learning(sess,\n",
     "                                    env,\n",
     "                                    q_estimator=q_estimator,\n",