Updated Readme. Changed Lambda to Gamma

Alex · Alex · commit 3fce6b57c93a · 2017-12-01T15:38:01.000+09:00
diff --git a/FA/Q-Learning with Value Function Approximation Solution.ipynb b/FA/Q-Learning with Value Function Approximation Solution.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "%matplotlib inline\n",
@@ -31,9 +29,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -50,9 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -74,7 +68,7 @@
     "scaler = sklearn.preprocessing.StandardScaler()\n",
     "scaler.fit(observation_examples)\n",
     "\n",
-    "# Used to converte a state to a featurizes represenation.\n",
+    "# Used to convert a state to a featurizes represenation.\n",
     "# We use RBF kernels with different variances to cover different parts of the space\n",
     "featurizer = sklearn.pipeline.FeatureUnion([\n",
     "        (\"rbf1\", RBFSampler(gamma=5.0, n_components=100)),\n",
@@ -88,9 +82,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "class Estimator():\n",
@@ -151,9 +143,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def make_epsilon_greedy_policy(estimator, epsilon, nA):\n",
@@ -182,9 +172,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def q_learning(env, estimator, num_episodes, discount_factor=1.0, epsilon=0.1, epsilon_decay=1.0):\n",
@@ -196,7 +184,7 @@
     "        env: OpenAI environment.\n",
     "        estimator: Action-Value function estimator\n",
     "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
+    "        discount_factor: Gamma discount factor.\n",
     "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
     "        epsilon_decay: Each episode, epsilon is decayed by this factor\n",
     "    \n",
@@ -283,9 +271,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -305,9 +291,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -384,9 +368,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/FA/Q-Learning with Value Function Approximation.ipynb b/FA/Q-Learning with Value Function Approximation.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -31,9 +31,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -50,9 +48,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -89,7 +85,7 @@
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -149,7 +145,7 @@
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -180,7 +176,7 @@
    "cell_type": "code",
    "execution_count": 18,
    "metadata": {
-    "collapsed": false
+    "collapsed": true
    },
    "outputs": [],
    "source": [
@@ -193,7 +189,7 @@
     "        env: OpenAI environment.\n",
     "        estimator: Action-Value function estimator\n",
     "        num_episodes: Number of episodes to run for.\n",
-    "        discount_factor: Lambda time discount factor.\n",
+    "        discount_factor: Gamma discount factor.\n",
     "        epsilon: Chance the sample a random action. Float betwen 0 and 1.\n",
     "        epsilon_decay: Each episode, epsilon is decayed by this factor\n",
     "    \n",
@@ -237,9 +233,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -259,9 +253,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -326,9 +318,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.3"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/FA/README.md b/FA/README.md
@@ -35,6 +35,8 @@
 
 ### Exercises
 
+- Get familiar with the [Mountain Car Playground](MountainCar%20Playground.ipynb)
+
 - Solve Mountain Car Problem using Q-Learning with Linear Function Approximation
   - [Exercise](Q-Learning%20with%20Value%20Function%20Approximation.ipynb)
   - [Solution](Q-Learning%20with%20Value%20Function%20Approximation%20Solution.ipynb)