DQN: Fixed typos. Changed labmda to gamma. Updated Readme

Alex · Alex · commit dee1e01b6e4e · 2017-12-07T15:23:25.000+09:00
diff --git a/DQN/Deep Q Learning Solution.ipynb b/DQN/Deep Q Learning Solution.ipynb
@@ -30,7 +30,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "env = gym.envs.make(\"Breakout-v0\")"
@@ -39,7 +41,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -56,7 +60,7 @@
    "source": [
     "class StateProcessor():\n",
     "    \"\"\"\n",
-    "    Processes a raw Atari iamges. Resizes it and converts it to grayscale.\n",
+    "    Processes a raw Atari images. Resizes it and converts it to grayscale.\n",
     "    \"\"\"\n",
     "    def __init__(self):\n",
     "        # Build the Tensorflow graph\n",
@@ -83,7 +87,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "class Estimator():\n",
@@ -193,7 +199,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# For Testing....\n",
@@ -295,7 +303,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "def deep_q_learning(sess,\n",
@@ -315,7 +325,7 @@
     "                    batch_size=32,\n",
     "                    record_video_every=50):\n",
     "    \"\"\"\n",
-    "    Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
+    "    Q-Learning algorithm for off-policy TD control using Function Approximation.\n",
     "    Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
     "\n",
     "    Args:\n",
@@ -331,7 +341,7 @@
     "          the reply memory.\n",
     "        update_target_estimator_every: Copy parameters from the Q estimator to the \n",
     "          target estimator every N steps\n",
-    "        discount_factor: Lambda time discount factor\n",
+    "        discount_factor: Gamma discount factor\n",
     "        epsilon_start: Chance to sample a random action when taking an action.\n",
     "          Epsilon is decayed over time and this is the start value\n",
     "        epsilon_end: The final minimum value of epsilon after decaying is done\n",
@@ -494,7 +504,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "tf.reset_default_graph()\n",
@@ -569,7 +581,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
diff --git a/DQN/Deep Q Learning.ipynb b/DQN/Deep Q Learning.ipynb
@@ -29,9 +29,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = gym.envs.make(\"Breakout-v0\")"
@@ -40,9 +38,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -59,7 +55,7 @@
    "source": [
     "class StateProcessor():\n",
     "    \"\"\"\n",
-    "    Processes a raw Atari iamges. Resizes it and converts it to grayscale.\n",
+    "    Processes a raw Atari images. Resizes it and converts it to grayscale.\n",
     "    \"\"\"\n",
     "    def __init__(self):\n",
     "        # Build the Tensorflow graph\n",
@@ -86,9 +82,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "class Estimator():\n",
@@ -199,9 +193,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# For Testing....\n",
@@ -234,9 +226,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def copy_model_parameters(sess, estimator1, estimator2):\n",
@@ -294,9 +284,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def deep_q_learning(sess,\n",
@@ -316,7 +304,7 @@
     "                    batch_size=32,\n",
     "                    record_video_every=50):\n",
     "    \"\"\"\n",
-    "    Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
+    "    Q-Learning algorithm for off-policy TD control using Function Approximation.\n",
     "    Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
     "\n",
     "    Args:\n",
@@ -332,7 +320,7 @@
     "          the reply memory.\n",
     "        update_target_estimator_every: Copy parameters from the Q estimator to the \n",
     "          target estimator every N steps\n",
-    "        discount_factor: Lambda time discount factor\n",
+    "        discount_factor: Gamma discount factor\n",
     "        epsilon_start: Chance to sample a random action when taking an action.\n",
     "          Epsilon is decayed over time and this is the start value\n",
     "        epsilon_end: The final minimum value of epsilon after decaying is done\n",
@@ -469,9 +457,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "tf.reset_default_graph()\n",
@@ -528,9 +514,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.0"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/DQN/Double DQN Solution.ipynb b/DQN/Double DQN Solution.ipynb
@@ -28,9 +28,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = gym.envs.make(\"Breakout-v0\")"
@@ -39,9 +37,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Atari Actions: 0 (noop), 1 (fire), 2 (left) and 3 (right) are valid actions\n",
@@ -58,7 +54,7 @@
    "source": [
     "class StateProcessor():\n",
     "    \"\"\"\n",
-    "    Processes a raw Atari iamges. Resizes it and converts it to grayscale.\n",
+    "    Processes a raw Atari images. Resizes it and converts it to grayscale.\n",
     "    \"\"\"\n",
     "    def __init__(self):\n",
     "        # Build the Tensorflow graph\n",
@@ -85,9 +81,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "class Estimator():\n",
@@ -175,9 +169,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# For Testing....\n",
@@ -210,9 +202,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def copy_model_parameters(sess, estimator1, estimator2):\n",
@@ -270,9 +260,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def deep_q_learning(sess,\n",
@@ -292,7 +280,7 @@
     "                    batch_size=32,\n",
     "                    record_video_every=50):\n",
     "    \"\"\"\n",
-    "    Q-Learning algorithm for fff-policy TD control using Function Approximation.\n",
+    "    Q-Learning algorithm for off-policy TD control using Function Approximation.\n",
     "    Finds the optimal greedy policy while following an epsilon-greedy policy.\n",
     "\n",
     "    Args:\n",
@@ -308,7 +296,7 @@
     "          the reply memory.\n",
     "        update_target_estimator_every: Copy parameters from the Q estimator to the \n",
     "          target estimator every N steps\n",
-    "        discount_factor: Lambda time discount factor\n",
+    "        discount_factor: Gamma discount factor\n",
     "        epsilon_start: Chance to sample a random action when taking an action.\n",
     "          Epsilon is decayed over time and this is the start value\n",
     "        epsilon_end: The final minimum value of epsilon after decaying is done\n",
@@ -472,9 +460,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "tf.reset_default_graph()\n",
@@ -531,9 +517,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/DQN/README.md b/DQN/README.md
@@ -39,7 +39,7 @@
 
 ### Exercises
 
-- [OpenAI Gym Atari Environment Playground](Breakout%20Playground.ipynb)
+- Get familiar with the [OpenAI Gym Atari Environment Playground](Breakout%20Playground.ipynb)
 - Deep-Q Learning for Atari Games
   - [Exercise](Deep%20Q%20Learning.ipynb)
   - [Solution](Deep%20Q%20Learning%20Solution.ipynb)
diff --git a/DQN/dqn.py b/DQN/dqn.py
@@ -20,7 +20,7 @@
 
 class StateProcessor():
     """
-    Processes a raw Atari iamges. Resizes it and converts it to grayscale.
+    Processes a raw Atari images. Resizes it and converts it to grayscale.
     """
     def __init__(self):
         # Build the Tensorflow graph
@@ -208,7 +208,7 @@ def deep_q_learning(sess,
                     batch_size=32,
                     record_video_every=50):
     """
-    Q-Learning algorithm for fff-policy TD control using Function Approximation.
+    Q-Learning algorithm for off-policy TD control using Function Approximation.
     Finds the optimal greedy policy while following an epsilon-greedy policy.
 
     Args:
@@ -224,7 +224,7 @@ def deep_q_learning(sess,
           the reply memory.
         update_target_estimator_every: Copy parameters from the Q estimator to the 
           target estimator every N steps
-        discount_factor: Lambda time discount factor
+        discount_factor: Gamma discount factor
         epsilon_start: Chance to sample a random action when taking an action.
           Epsilon is decayed over time and this is the start value
         epsilon_end: The final minimum value of epsilon after decaying is done