Created using Colaboratory

ZacharySBrown · ZacharySBrown · commit d45177deaf71 · 2021-02-15T14:26:05.000-05:00
diff --git a/examples/4a_averages_and_ses.ipynb b/examples/4a_averages_and_ses.ipynb
@@ -6,7 +6,7 @@
       "name": "4a_averages_and_ses.ipynb",
       "provenance": [],
       "collapsed_sections": [],
-      "authorship_tag": "ABX9TyP+mgtJZoqPKv7k/JkZPGz4",
+      "authorship_tag": "ABX9TyN0MCV9CSEvVqalgvPpkNxv",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -44,7 +44,7 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "qyrssun2kgSi",
-        "outputId": "f07f1e2e-9055-42aa-c770-474996ab25a2"
+        "outputId": "15bf8f9f-666a-4cd4-ff0f-131f9e75b5dd"
       },
       "source": [
         "import pandas as pd\n",
@@ -69,29 +69,29 @@
         "\n",
         "from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt"
       ],
-      "execution_count": 156,
+      "execution_count": 162,
       "outputs": [
         {
           "output_type": "stream",
           "text": [
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100   477  100   477    0     0   3785      0 --:--:-- --:--:-- --:--:--  3785\n",
+            "100   477  100   477    0     0   3755      0 --:--:-- --:--:-- --:--:--  3726\n",
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100   629  100   629    0     0   4952      0 --:--:-- --:--:-- --:--:--  4992\n",
+            "100   629  100   629    0     0   4250      0 --:--:-- --:--:-- --:--:--  4278\n",
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100  3867  100  3867    0     0  37182      0 --:--:-- --:--:-- --:--:-- 37182\n",
+            "100  3867  100  3867    0     0  37911      0 --:--:-- --:--:-- --:--:-- 37911\n",
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100   456  100   456    0     0   3931      0 --:--:-- --:--:-- --:--:--  3931\n",
+            "100   456  100   456    0     0   5010      0 --:--:-- --:--:-- --:--:--  5010\n",
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100  1456  100  1456    0     0  13866      0 --:--:-- --:--:-- --:--:-- 13866\n",
+            "100  1456  100  1456    0     0  18200      0 --:--:-- --:--:-- --:--:-- 18200\n",
             "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
             "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100   151  100   151    0     0   1659      0 --:--:-- --:--:-- --:--:--  1659\n"
+            "100   151  100   151    0     0   1556      0 --:--:-- --:--:-- --:--:--  1556\n"
           ],
           "name": "stdout"
         }
@@ -116,24 +116,24 @@
           "height": 541
         },
         "id": "mbtL6K0tIPOM",
-        "outputId": "899b4e3a-aaab-48ec-e33f-9a24abe8d1e2"
+        "outputId": "877583d9-a0b9-4f39-ed54-2b965a792991"
       },
       "source": [
         "can_openers.plot()"
       ],
-      "execution_count": 5,
+      "execution_count": 163,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "<matplotlib.axes._subplots.AxesSubplot at 0x7f0bb96716a0>"
+              "<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9d3c1908>"
             ]
           },
           "metadata": {
             "tags": []
           },
-          "execution_count": 5
+          "execution_count": 163
         },
         {
           "output_type": "display_data",
@@ -170,16 +170,16 @@
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 228
+          "height": 0
         },
         "id": "Wy5VyH48klYK",
-        "outputId": "dc7cea7d-c76f-40bd-f6d2-c2984cfa812b"
+        "outputId": "6c9ac18a-7131-4aa1-cf8b-703d20a9cf30"
       },
       "source": [
         "can_openers['mean_forecast'] = can_openers.expanding().mean().shift(1)\n",
         "can_openers.head()"
       ],
-      "execution_count": 6,
+      "execution_count": 164,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -255,7 +255,7 @@
           "metadata": {
             "tags": []
           },
-          "execution_count": 6
+          "execution_count": 164
         }
       ]
     },
@@ -264,10 +264,10 @@
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 541
+          "height": 0
         },
         "id": "fsK3fqIIktfJ",
-        "outputId": "aba0cb1d-d729-4fd1-8360-41b7dc506d92"
+        "outputId": "23f58cb3-4d64-4f08-b6e2-d73c29c09d7f"
       },
       "source": [
         "can_openers['mean_forecast'].plot()\n",
@@ -279,19 +279,19 @@
         "can_openers['mean_value'].plot(color='red', ax=ax)\n",
         "plt.legend()"
       ],
-      "execution_count": 9,
+      "execution_count": 165,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "<matplotlib.legend.Legend at 0x7f0bb937d940>"
+              "<matplotlib.legend.Legend at 0x7f0b9ba1a2e8>"
             ]
           },
           "metadata": {
             "tags": []
           },
-          "execution_count": 9
+          "execution_count": 165
         },
         {
           "output_type": "display_data",
@@ -339,10 +339,10 @@
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 378
+          "height": 0
         },
         "id": "xWRta9Dzk-Tg",
-        "outputId": "4fedca51-cbb5-4c9f-cf89-f13441457292"
+        "outputId": "734d7aa2-f1c8-4c66-a3ab-656b77b82260"
       },
       "source": [
         "can_openers = pd.read_csv('can_openers.csv', parse_dates=['month']).set_index('month')\n",
@@ -352,7 +352,7 @@
         "can_openers['MA5_forecast'] = can_openers['shipments'].rolling(5, center=True).mean().shift(3)\n",
         "can_openers.head(10)"
       ],
-      "execution_count": 44,
+      "execution_count": 166,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -470,7 +470,7 @@
           "metadata": {
             "tags": []
           },
-          "execution_count": 44
+          "execution_count": 166
         }
       ]
     },
@@ -479,10 +479,10 @@
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 541
+          "height": 0
         },
         "id": "5Rnf3YpLLtt-",
-        "outputId": "edfe6411-1da6-4c26-9583-23a53d7a47b9"
+        "outputId": "affb0ae3-c083-4978-bde7-9c85bae0dc4e"
       },
       "source": [
         "can_openers['MA3_forecast'].plot()\n",
@@ -492,19 +492,19 @@
         "\n",
         "plt.legend()"
       ],
-      "execution_count": 45,
+      "execution_count": 167,
       "outputs": [
         {
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "<matplotlib.legend.Legend at 0x7f0bb11f36a0>"
+              "<matplotlib.legend.Legend at 0x7f0b9b9d2748>"
             ]
           },
           "metadata": {
             "tags": []
           },
-          "execution_count": 45
+          "execution_count": 167
         },
         {
           "output_type": "display_data",
@@ -550,7 +550,7 @@
           "height": 699
         },
         "id": "ZdpzoBlwcO6s",
-        "outputId": "cd35cf75-d706-4665-e675-54f2619d18a0"
+        "outputId": "f7b5cbce-5be3-402c-8e5b-eb752bf0dab0"
       },
       "source": [
         "# Reload a fresh copy of our data\n",
@@ -568,7 +568,7 @@
         "\n",
         "can_openers.plot()"
       ],
-      "execution_count": 160,
+      "execution_count": 168,
       "outputs": [
         {
           "output_type": "stream",
@@ -588,13 +588,13 @@
           "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9bc4e6d8>"
+              "<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9b90f780>"
             ]
           },
           "metadata": {
             "tags": []
           },
-          "execution_count": 160
+          "execution_count": 168
         },
         {
           "output_type": "display_data",
@@ -611,6 +611,77 @@
         }
       ]
     },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Hh5S264iJFG5",
+        "outputId": "6098d250-7255-4e1e-946e-b016b913fa74"
+      },
+      "source": [
+        "can_openers.shape"
+      ],
+      "execution_count": 169,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(12, 4)"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 169
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NY8QCfKkIoIM"
+      },
+      "source": [
+        "We can calculate the MSE to determine the goodness of fit for each of the smoothed fit values. Remember that we'll want to use a **test set** to perform our evaluation.\n",
+        "\n",
+        "For our purposes, we'll consider the last four observations to be the test set, and we can access this part of the `pandas` `DataFrame` with the `.tail(N)` function (`.tail(4)`).\n",
+        "\n",
+        "We can then calculate the MSE between each of the fits and the original data as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cj8petGxJctJ",
+        "outputId": "ad7a5e17-fe65-4209-c074-bff1983723f4"
+      },
+      "source": [
+        "test = can_openers.tail(4)\n",
+        "test.head()\n",
+        "\n",
+        "mse_1 = ((test['alpha_0.1'] - test['shipments'])**2).mean()\n",
+        "mse_5 = ((test['alpha_0.5'] - test['shipments'])**2).mean()\n",
+        "mse_9 = ((test['alpha_0.9'] - test['shipments'])**2).mean()\n",
+        "\n",
+        "print(mse_1, mse_5, mse_9)"
+      ],
+      "execution_count": 174,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "2228.2308984507677 3050.129555940628 3431.988106158692\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -648,7 +719,7 @@
         "\n",
         "can_openers.plot()"
       ],
-      "execution_count": 161,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",