aleeee
diff --git a/‎notebooks/ReinforcementLearning.ipynb‎
Lines changed: 92 additions & 24 deletions b/‎notebooks/ReinforcementLearning.ipynb‎
Lines changed: 92 additions & 24 deletions
diff --git a/‎notebooks/assets/reinforcement_learning/RMSerror.png‎
37 KB b/‎notebooks/assets/reinforcement_learning/RMSerror.png‎
37 KB
diff --git a/‎notebooks/assets/reinforcement_learning/utility_estimates.png‎
56.4 KB b/‎notebooks/assets/reinforcement_learning/utility_estimates.png‎
56.4 KB
@@ -162,21 +162,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[1,1] \t:\t0.7128593117885544\n",
-      "[1,2] \t:\t0.7680398391451688\n",
-      "[1,3] \t:\t0.8178806550835265\n",
-      "[2,1] \t:\t0.6628583416987663\n",
-      "[2,3] \t:\t0.8746799974574001\n",
+      "Cell \t:\tExpected Utility\n",
+      "-------------------------------------\n",
+      "[1,1] \t:\t0.7153193259024824\n",
+      "[1,2] \t:\t0.7707398421463386\n",
+      "[1,3] \t:\t0.8203828048081079\n",
+      "[2,1] \t:\t0.6670047267920397\n",
+      "[2,3] \t:\t0.8762199960076309\n",
       "[3,1] \t:\tnull\n",
-      "[3,2] \t:\t0.6938189410949245\n",
-      "[3,3] \t:\t0.9241799994408929\n",
+      "[3,2] \t:\t0.7344940650463005\n",
+      "[3,3] \t:\t0.9266999990710265\n",
       "[4,1] \t:\tnull\n",
       "[4,2] \t:\t-1.0\n",
       "[4,3] \t:\t1.0\n"
@@ -188,7 +190,7 @@
        "null"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -228,7 +230,8 @@
     "cwe.addAgent(padpa);\n",
     "padpa.reset();\n",
     "cwe.executeTrials(2000);\n",
-    "\n",
+    "System.out.println(\"Cell\"  + \" \\t:\\t\" + \"Expected Utility\");\n",
+    "System.out.println(\"-------------------------------------\");\n",
     "Map<Cell<Double>, Double> U = padpa.getUtility();\n",
     "for(int i = 1; i<=4; i++){\n",
     "    for(int j = 1; j<=3; j++){\n",
@@ -254,20 +257,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 42,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "null"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import aima.core.environment.cellworld.*;\n",
     "import aima.core.learning.reinforcement.agent.PassiveADPAgent;\n",
@@ -318,7 +310,83 @@
     "        }\n",
     "    }\n",
     "    runs.put(r, trials);\n",
-    "}"
+    "}\n",
+    "\n",
+    "def T = [];\n",
+    "def v4_3 = [];\n",
+    "def v3_3 = [];\n",
+    "def v1_3 = [];\n",
+    "def v1_1 = [];\n",
+    "def v3_2 = [];\n",
+    "def v2_1 = [];\n",
+    "double tmp = 0.0;\n",
+    "for (int t = 0; t < (numTrialsPerRun / reportEveryN); t++) {\n",
+    "    T.add(t);\n",
+    "    Map<Cell<Double>, Double> u = runs.get(numRuns - 1).get(t);\n",
+    "    tmp = (u.containsKey(cw.getCellAt(4, 3)) ? u.get(cw.getCellAt(4, 3)) : 0.0);\n",
+    "    v4_3.add(tmp);\n",
+    "    tmp = (u.containsKey(cw.getCellAt(3, 3)) ? u.get(cw.getCellAt(3, 3)) : 0.0);\n",
+    "    v3_3.add(tmp);\n",
+    "    tmp = (u.containsKey(cw.getCellAt(1, 3)) ? u.get(cw.getCellAt(1, 3)) : 0.0);\n",
+    "    v1_3.add(tmp);\n",
+    "    tmp = (u.containsKey(cw.getCellAt(1, 1)) ? u.get(cw.getCellAt(1, 1)) : 0.0);\n",
+    "    v1_1.add(tmp);\n",
+    "    tmp = (u.containsKey(cw.getCellAt(3, 2)) ? u.get(cw.getCellAt(3, 2)) : 0.0);\n",
+    "    v3_2.add(tmp);\n",
+    "    tmp = (u.containsKey(cw.getCellAt(2, 1)) ? u.get(cw.getCellAt(2, 1)) : 0.0);\n",
+    "    v2_1.add(tmp);\n",
+    "}\n",
+    "\n",
+    "def p1 = new Plot(title: \"Learning Curve\", yLabel: \"Utility estimates\", xLabel: \"Number of trails\");\n",
+    "p1 << new Line(x: T, y: v4_3, displayName: \"v4_3\")\n",
+    "p1 << new Line(x: T, y: v3_3, displayName: \"v3_3\")\n",
+    "p1 << new Line(x: T, y: v1_3, displayName: \"v1_3\")\n",
+    "p1 << new Line(x: T, y: v1_1, displayName: \"v1_1\")\n",
+    "p1 << new Line(x: T, y: v3_2, displayName: \"v3_2\")\n",
+    "p1 << new Line(x: T, y: v2_1, displayName: \"v2_1\")\n",
+    "\n",
+    "def trails = [];\n",
+    "def rmseValues = [];\n",
+    "for (int t = 0; t < rmseTrialsToReport; t++) {\n",
+    "    trails.add(t);\n",
+    "    double xSsquared = 0;\n",
+    "    for (int r = 0; r < numRuns; r++) {\n",
+    "        Map<Cell<Double>, Double> u = runs.get(r).get(t);\n",
+    "        Double val1_1 = u.get(cw.getCellAt(1, 1));\n",
+    "        xSsquared += Math.pow(0.705 - val1_1, 2);\n",
+    "    }\n",
+    "    double rmse = Math.sqrt(xSsquared / runs.size());\n",
+    "    rmseValues.add(rmse);\n",
+    "}\n",
+    "def p2 = new Plot(yLabel: \"RMS error in utility\", xLabel: \"Number of trails\");\n",
+    "p2 << new Line(x: trails, y: rmseValues)\n",
+    "OutputCell.HIDDEN"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Utility estimates][1]][1]\n",
+    "\n",
+    "[1]: assets/reinforcement_learning/utility_estimates.png"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![RMS error in utility][1]][1]\n",
+    "\n",
+    "[1]: assets/reinforcement_learning/RMSerror.png"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* The first figure shows the utility estimates for some of the states as a function of the number of trails. Notice the large changes occuring around the 63rd trial - this is the first time that the agent falls into the -1 terminal state at $[4,2]$. \n",
+    "* The second plot shoes the root-mean-square error in the estimate for $U(1,1)$, averaged over 20 runs of 100 trails each."
    ]
   },
   {