|
6 | 6 | "name": "4a_averages_and_ses.ipynb",
|
7 | 7 | "provenance": [],
|
8 | 8 | "collapsed_sections": [],
|
9 |
| - "authorship_tag": "ABX9TyP+mgtJZoqPKv7k/JkZPGz4", |
| 9 | + "authorship_tag": "ABX9TyN0MCV9CSEvVqalgvPpkNxv", |
10 | 10 | "include_colab_link": true
|
11 | 11 | },
|
12 | 12 | "kernelspec": {
|
|
44 | 44 | "base_uri": "https://localhost:8080/"
|
45 | 45 | },
|
46 | 46 | "id": "qyrssun2kgSi",
|
47 |
| - "outputId": "f07f1e2e-9055-42aa-c770-474996ab25a2" |
| 47 | + "outputId": "15bf8f9f-666a-4cd4-ff0f-131f9e75b5dd" |
48 | 48 | },
|
49 | 49 | "source": [
|
50 | 50 | "import pandas as pd\n",
|
|
69 | 69 | "\n",
|
70 | 70 | "from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt"
|
71 | 71 | ],
|
72 |
| - "execution_count": 156, |
| 72 | + "execution_count": 162, |
73 | 73 | "outputs": [
|
74 | 74 | {
|
75 | 75 | "output_type": "stream",
|
76 | 76 | "text": [
|
77 | 77 | " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
78 | 78 | " Dload Upload Total Spent Left Speed\n",
|
79 |
| - "100 477 100 477 0 0 3785 0 --:--:-- --:--:-- --:--:-- 3785\n", |
| 79 | + "100 477 100 477 0 0 3755 0 --:--:-- --:--:-- --:--:-- 3726\n", |
80 | 80 | " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
81 | 81 | " Dload Upload Total Spent Left Speed\n",
|
82 |
| - "100 629 100 629 0 0 4952 0 --:--:-- --:--:-- --:--:-- 4992\n", |
| 82 | + "100 629 100 629 0 0 4250 0 --:--:-- --:--:-- --:--:-- 4278\n", |
83 | 83 | " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
84 | 84 | " Dload Upload Total Spent Left Speed\n",
|
85 |
| - "100 3867 100 3867 0 0 37182 0 --:--:-- --:--:-- --:--:-- 37182\n", |
| 85 | + "100 3867 100 3867 0 0 37911 0 --:--:-- --:--:-- --:--:-- 37911\n", |
86 | 86 | " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
87 | 87 | " Dload Upload Total Spent Left Speed\n",
|
88 |
| - "100 456 100 456 0 0 3931 0 --:--:-- --:--:-- --:--:-- 3931\n", |
| 88 | + "100 456 100 456 0 0 5010 0 --:--:-- --:--:-- --:--:-- 5010\n", |
89 | 89 | " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
90 | 90 | " Dload Upload Total Spent Left Speed\n",
|
91 |
| - "100 1456 100 1456 0 0 13866 0 --:--:-- --:--:-- --:--:-- 13866\n", |
| 91 | + "100 1456 100 1456 0 0 18200 0 --:--:-- --:--:-- --:--:-- 18200\n", |
92 | 92 | " % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
93 | 93 | " Dload Upload Total Spent Left Speed\n",
|
94 |
| - "100 151 100 151 0 0 1659 0 --:--:-- --:--:-- --:--:-- 1659\n" |
| 94 | + "100 151 100 151 0 0 1556 0 --:--:-- --:--:-- --:--:-- 1556\n" |
95 | 95 | ],
|
96 | 96 | "name": "stdout"
|
97 | 97 | }
|
|
116 | 116 | "height": 541
|
117 | 117 | },
|
118 | 118 | "id": "mbtL6K0tIPOM",
|
119 |
| - "outputId": "899b4e3a-aaab-48ec-e33f-9a24abe8d1e2" |
| 119 | + "outputId": "877583d9-a0b9-4f39-ed54-2b965a792991" |
120 | 120 | },
|
121 | 121 | "source": [
|
122 | 122 | "can_openers.plot()"
|
123 | 123 | ],
|
124 |
| - "execution_count": 5, |
| 124 | + "execution_count": 163, |
125 | 125 | "outputs": [
|
126 | 126 | {
|
127 | 127 | "output_type": "execute_result",
|
128 | 128 | "data": {
|
129 | 129 | "text/plain": [
|
130 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7f0bb96716a0>" |
| 130 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9d3c1908>" |
131 | 131 | ]
|
132 | 132 | },
|
133 | 133 | "metadata": {
|
134 | 134 | "tags": []
|
135 | 135 | },
|
136 |
| - "execution_count": 5 |
| 136 | + "execution_count": 163 |
137 | 137 | },
|
138 | 138 | {
|
139 | 139 | "output_type": "display_data",
|
|
170 | 170 | "metadata": {
|
171 | 171 | "colab": {
|
172 | 172 | "base_uri": "https://localhost:8080/",
|
173 |
| - "height": 228 |
| 173 | + "height": 0 |
174 | 174 | },
|
175 | 175 | "id": "Wy5VyH48klYK",
|
176 |
| - "outputId": "dc7cea7d-c76f-40bd-f6d2-c2984cfa812b" |
| 176 | + "outputId": "6c9ac18a-7131-4aa1-cf8b-703d20a9cf30" |
177 | 177 | },
|
178 | 178 | "source": [
|
179 | 179 | "can_openers['mean_forecast'] = can_openers.expanding().mean().shift(1)\n",
|
180 | 180 | "can_openers.head()"
|
181 | 181 | ],
|
182 |
| - "execution_count": 6, |
| 182 | + "execution_count": 164, |
183 | 183 | "outputs": [
|
184 | 184 | {
|
185 | 185 | "output_type": "execute_result",
|
|
255 | 255 | "metadata": {
|
256 | 256 | "tags": []
|
257 | 257 | },
|
258 |
| - "execution_count": 6 |
| 258 | + "execution_count": 164 |
259 | 259 | }
|
260 | 260 | ]
|
261 | 261 | },
|
|
264 | 264 | "metadata": {
|
265 | 265 | "colab": {
|
266 | 266 | "base_uri": "https://localhost:8080/",
|
267 |
| - "height": 541 |
| 267 | + "height": 0 |
268 | 268 | },
|
269 | 269 | "id": "fsK3fqIIktfJ",
|
270 |
| - "outputId": "aba0cb1d-d729-4fd1-8360-41b7dc506d92" |
| 270 | + "outputId": "23f58cb3-4d64-4f08-b6e2-d73c29c09d7f" |
271 | 271 | },
|
272 | 272 | "source": [
|
273 | 273 | "can_openers['mean_forecast'].plot()\n",
|
|
279 | 279 | "can_openers['mean_value'].plot(color='red', ax=ax)\n",
|
280 | 280 | "plt.legend()"
|
281 | 281 | ],
|
282 |
| - "execution_count": 9, |
| 282 | + "execution_count": 165, |
283 | 283 | "outputs": [
|
284 | 284 | {
|
285 | 285 | "output_type": "execute_result",
|
286 | 286 | "data": {
|
287 | 287 | "text/plain": [
|
288 |
| - "<matplotlib.legend.Legend at 0x7f0bb937d940>" |
| 288 | + "<matplotlib.legend.Legend at 0x7f0b9ba1a2e8>" |
289 | 289 | ]
|
290 | 290 | },
|
291 | 291 | "metadata": {
|
292 | 292 | "tags": []
|
293 | 293 | },
|
294 |
| - "execution_count": 9 |
| 294 | + "execution_count": 165 |
295 | 295 | },
|
296 | 296 | {
|
297 | 297 | "output_type": "display_data",
|
|
339 | 339 | "metadata": {
|
340 | 340 | "colab": {
|
341 | 341 | "base_uri": "https://localhost:8080/",
|
342 |
| - "height": 378 |
| 342 | + "height": 0 |
343 | 343 | },
|
344 | 344 | "id": "xWRta9Dzk-Tg",
|
345 |
| - "outputId": "4fedca51-cbb5-4c9f-cf89-f13441457292" |
| 345 | + "outputId": "734d7aa2-f1c8-4c66-a3ab-656b77b82260" |
346 | 346 | },
|
347 | 347 | "source": [
|
348 | 348 | "can_openers = pd.read_csv('can_openers.csv', parse_dates=['month']).set_index('month')\n",
|
|
352 | 352 | "can_openers['MA5_forecast'] = can_openers['shipments'].rolling(5, center=True).mean().shift(3)\n",
|
353 | 353 | "can_openers.head(10)"
|
354 | 354 | ],
|
355 |
| - "execution_count": 44, |
| 355 | + "execution_count": 166, |
356 | 356 | "outputs": [
|
357 | 357 | {
|
358 | 358 | "output_type": "execute_result",
|
|
470 | 470 | "metadata": {
|
471 | 471 | "tags": []
|
472 | 472 | },
|
473 |
| - "execution_count": 44 |
| 473 | + "execution_count": 166 |
474 | 474 | }
|
475 | 475 | ]
|
476 | 476 | },
|
|
479 | 479 | "metadata": {
|
480 | 480 | "colab": {
|
481 | 481 | "base_uri": "https://localhost:8080/",
|
482 |
| - "height": 541 |
| 482 | + "height": 0 |
483 | 483 | },
|
484 | 484 | "id": "5Rnf3YpLLtt-",
|
485 |
| - "outputId": "edfe6411-1da6-4c26-9583-23a53d7a47b9" |
| 485 | + "outputId": "affb0ae3-c083-4978-bde7-9c85bae0dc4e" |
486 | 486 | },
|
487 | 487 | "source": [
|
488 | 488 | "can_openers['MA3_forecast'].plot()\n",
|
|
492 | 492 | "\n",
|
493 | 493 | "plt.legend()"
|
494 | 494 | ],
|
495 |
| - "execution_count": 45, |
| 495 | + "execution_count": 167, |
496 | 496 | "outputs": [
|
497 | 497 | {
|
498 | 498 | "output_type": "execute_result",
|
499 | 499 | "data": {
|
500 | 500 | "text/plain": [
|
501 |
| - "<matplotlib.legend.Legend at 0x7f0bb11f36a0>" |
| 501 | + "<matplotlib.legend.Legend at 0x7f0b9b9d2748>" |
502 | 502 | ]
|
503 | 503 | },
|
504 | 504 | "metadata": {
|
505 | 505 | "tags": []
|
506 | 506 | },
|
507 |
| - "execution_count": 45 |
| 507 | + "execution_count": 167 |
508 | 508 | },
|
509 | 509 | {
|
510 | 510 | "output_type": "display_data",
|
|
550 | 550 | "height": 699
|
551 | 551 | },
|
552 | 552 | "id": "ZdpzoBlwcO6s",
|
553 |
| - "outputId": "cd35cf75-d706-4665-e675-54f2619d18a0" |
| 553 | + "outputId": "f7b5cbce-5be3-402c-8e5b-eb752bf0dab0" |
554 | 554 | },
|
555 | 555 | "source": [
|
556 | 556 | "# Reload a fresh copy of our data\n",
|
|
568 | 568 | "\n",
|
569 | 569 | "can_openers.plot()"
|
570 | 570 | ],
|
571 |
| - "execution_count": 160, |
| 571 | + "execution_count": 168, |
572 | 572 | "outputs": [
|
573 | 573 | {
|
574 | 574 | "output_type": "stream",
|
|
588 | 588 | "output_type": "execute_result",
|
589 | 589 | "data": {
|
590 | 590 | "text/plain": [
|
591 |
| - "<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9bc4e6d8>" |
| 591 | + "<matplotlib.axes._subplots.AxesSubplot at 0x7f0b9b90f780>" |
592 | 592 | ]
|
593 | 593 | },
|
594 | 594 | "metadata": {
|
595 | 595 | "tags": []
|
596 | 596 | },
|
597 |
| - "execution_count": 160 |
| 597 | + "execution_count": 168 |
598 | 598 | },
|
599 | 599 | {
|
600 | 600 | "output_type": "display_data",
|
|
611 | 611 | }
|
612 | 612 | ]
|
613 | 613 | },
|
| 614 | + { |
| 615 | + "cell_type": "code", |
| 616 | + "metadata": { |
| 617 | + "colab": { |
| 618 | + "base_uri": "https://localhost:8080/" |
| 619 | + }, |
| 620 | + "id": "Hh5S264iJFG5", |
| 621 | + "outputId": "6098d250-7255-4e1e-946e-b016b913fa74" |
| 622 | + }, |
| 623 | + "source": [ |
| 624 | + "can_openers.shape" |
| 625 | + ], |
| 626 | + "execution_count": 169, |
| 627 | + "outputs": [ |
| 628 | + { |
| 629 | + "output_type": "execute_result", |
| 630 | + "data": { |
| 631 | + "text/plain": [ |
| 632 | + "(12, 4)" |
| 633 | + ] |
| 634 | + }, |
| 635 | + "metadata": { |
| 636 | + "tags": [] |
| 637 | + }, |
| 638 | + "execution_count": 169 |
| 639 | + } |
| 640 | + ] |
| 641 | + }, |
| 642 | + { |
| 643 | + "cell_type": "markdown", |
| 644 | + "metadata": { |
| 645 | + "id": "NY8QCfKkIoIM" |
| 646 | + }, |
| 647 | + "source": [ |
| 648 | + "We can calculate the MSE to determine the goodness of fit for each of the smoothed fit values. Remember that we'll want to use a **test set** to perform our evaluation.\n", |
| 649 | + "\n", |
| 650 | + "For our purposes, we'll consider the last four observations to be the test set, and we can access this part of the `pandas` `DataFrame` with the `.tail(N)` function (`.tail(4)`).\n", |
| 651 | + "\n", |
| 652 | + "We can then calculate the MSE between each of the fits and the original data as follows:" |
| 653 | + ] |
| 654 | + }, |
| 655 | + { |
| 656 | + "cell_type": "code", |
| 657 | + "metadata": { |
| 658 | + "colab": { |
| 659 | + "base_uri": "https://localhost:8080/" |
| 660 | + }, |
| 661 | + "id": "cj8petGxJctJ", |
| 662 | + "outputId": "ad7a5e17-fe65-4209-c074-bff1983723f4" |
| 663 | + }, |
| 664 | + "source": [ |
| 665 | + "test = can_openers.tail(4)\n", |
| 666 | + "test.head()\n", |
| 667 | + "\n", |
| 668 | + "mse_1 = ((test['alpha_0.1'] - test['shipments'])**2).mean()\n", |
| 669 | + "mse_5 = ((test['alpha_0.5'] - test['shipments'])**2).mean()\n", |
| 670 | + "mse_9 = ((test['alpha_0.9'] - test['shipments'])**2).mean()\n", |
| 671 | + "\n", |
| 672 | + "print(mse_1, mse_5, mse_9)" |
| 673 | + ], |
| 674 | + "execution_count": 174, |
| 675 | + "outputs": [ |
| 676 | + { |
| 677 | + "output_type": "stream", |
| 678 | + "text": [ |
| 679 | + "2228.2308984507677 3050.129555940628 3431.988106158692\n" |
| 680 | + ], |
| 681 | + "name": "stdout" |
| 682 | + } |
| 683 | + ] |
| 684 | + }, |
614 | 685 | {
|
615 | 686 | "cell_type": "markdown",
|
616 | 687 | "metadata": {
|
|
648 | 719 | "\n",
|
649 | 720 | "can_openers.plot()"
|
650 | 721 | ],
|
651 |
| - "execution_count": 161, |
| 722 | + "execution_count": null, |
652 | 723 | "outputs": [
|
653 | 724 | {
|
654 | 725 | "output_type": "stream",
|
|
0 commit comments