|
4 | 4 | "cell_type": "markdown",
|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 |
| - "Example of Regression Analysis Using the Boston Housing Data Set\n", |
8 |
| - "http://facweb.cs.depaul.edu/mobasher/classes/CSC478/Data/housing-dscr.txt" |
| 7 | + "Example of Regression Analysis Using the Boston Housing Data Set: http://facweb.cs.depaul.edu/mobasher/classes/CSC478/Data/housing-dscr.txt\n", |
| 8 | + "\n", |
| 9 | + "Source: http://facweb.cs.depaul.edu/mobasher/classes/CSC478/Notes/IPython%20Notebook%20-%20Regression.html" |
9 | 10 | ]
|
10 | 11 | },
|
11 | 12 | {
|
12 | 13 | "cell_type": "code",
|
13 | 14 | "execution_count": 1,
|
14 |
| - "metadata": { |
15 |
| - "collapsed": false |
16 |
| - }, |
| 15 | + "metadata": {}, |
17 | 16 | "outputs": [
|
18 | 17 | {
|
19 | 18 | "name": "stderr",
|
|
47 | 46 | {
|
48 | 47 | "cell_type": "code",
|
49 | 48 | "execution_count": 3,
|
50 |
| - "metadata": { |
51 |
| - "collapsed": false |
52 |
| - }, |
| 49 | + "metadata": {}, |
53 | 50 | "outputs": [
|
54 | 51 | {
|
55 | 52 | "data": {
|
|
142 | 139 | {
|
143 | 140 | "cell_type": "code",
|
144 | 141 | "execution_count": 4,
|
145 |
| - "metadata": { |
146 |
| - "collapsed": false |
147 |
| - }, |
| 142 | + "metadata": {}, |
148 | 143 | "outputs": [
|
149 | 144 | {
|
150 | 145 | "data": {
|
|
164 | 159 | {
|
165 | 160 | "cell_type": "code",
|
166 | 161 | "execution_count": 5,
|
167 |
| - "metadata": { |
168 |
| - "collapsed": false |
169 |
| - }, |
| 162 | + "metadata": {}, |
170 | 163 | "outputs": [
|
171 | 164 | {
|
172 | 165 | "data": {
|
|
186 | 179 | {
|
187 | 180 | "cell_type": "code",
|
188 | 181 | "execution_count": 6,
|
189 |
| - "metadata": { |
190 |
| - "collapsed": false |
191 |
| - }, |
| 182 | + "metadata": {}, |
192 | 183 | "outputs": [
|
193 | 184 | {
|
194 | 185 | "data": {
|
|
241 | 232 | {
|
242 | 233 | "cell_type": "code",
|
243 | 234 | "execution_count": 10,
|
244 |
| - "metadata": { |
245 |
| - "collapsed": false |
246 |
| - }, |
| 235 | + "metadata": {}, |
247 | 236 | "outputs": [
|
248 | 237 | {
|
249 | 238 | "data": {
|
|
284 | 273 | {
|
285 | 274 | "cell_type": "code",
|
286 | 275 | "execution_count": 15,
|
287 |
| - "metadata": { |
288 |
| - "collapsed": false |
289 |
| - }, |
| 276 | + "metadata": {}, |
290 | 277 | "outputs": [
|
291 | 278 | {
|
292 | 279 | "data": {
|
|
326 | 313 | {
|
327 | 314 | "cell_type": "code",
|
328 | 315 | "execution_count": 17,
|
329 |
| - "metadata": { |
330 |
| - "collapsed": false |
331 |
| - }, |
| 316 | + "metadata": {}, |
332 | 317 | "outputs": [
|
333 | 318 | {
|
334 | 319 | "data": {
|
|
356 | 341 | {
|
357 | 342 | "cell_type": "code",
|
358 | 343 | "execution_count": 18,
|
359 |
| - "metadata": { |
360 |
| - "collapsed": false |
361 |
| - }, |
| 344 | + "metadata": {}, |
362 | 345 | "outputs": [
|
363 | 346 | {
|
364 | 347 | "data": {
|
|
379 | 362 | {
|
380 | 363 | "cell_type": "code",
|
381 | 364 | "execution_count": 19,
|
382 |
| - "metadata": { |
383 |
| - "collapsed": false |
384 |
| - }, |
| 365 | + "metadata": {}, |
385 | 366 | "outputs": [
|
386 | 367 | {
|
387 | 368 | "data": {
|
|
402 | 383 | {
|
403 | 384 | "cell_type": "code",
|
404 | 385 | "execution_count": 21,
|
405 |
| - "metadata": { |
406 |
| - "collapsed": false |
407 |
| - }, |
| 386 | + "metadata": {}, |
408 | 387 | "outputs": [
|
409 | 388 | {
|
410 | 389 | "data": {
|
|
425 | 404 | {
|
426 | 405 | "cell_type": "code",
|
427 | 406 | "execution_count": 23,
|
428 |
| - "metadata": { |
429 |
| - "collapsed": false |
430 |
| - }, |
| 407 | + "metadata": {}, |
431 | 408 | "outputs": [
|
432 | 409 | {
|
433 | 410 | "data": {
|
|
454 | 431 | {
|
455 | 432 | "cell_type": "code",
|
456 | 433 | "execution_count": 24,
|
457 |
| - "metadata": { |
458 |
| - "collapsed": false |
459 |
| - }, |
| 434 | + "metadata": {}, |
460 | 435 | "outputs": [
|
461 | 436 | {
|
462 | 437 | "data": {
|
|
480 | 455 | {
|
481 | 456 | "cell_type": "code",
|
482 | 457 | "execution_count": 26,
|
483 |
| - "metadata": { |
484 |
| - "collapsed": false |
485 |
| - }, |
| 458 | + "metadata": {}, |
486 | 459 | "outputs": [
|
487 | 460 | {
|
488 | 461 | "name": "stdout",
|
|
501 | 474 | {
|
502 | 475 | "cell_type": "code",
|
503 | 476 | "execution_count": 27,
|
504 |
| - "metadata": { |
505 |
| - "collapsed": false |
506 |
| - }, |
| 477 | + "metadata": {}, |
507 | 478 | "outputs": [
|
508 | 479 | {
|
509 | 480 | "data": {
|
|
529 | 500 | {
|
530 | 501 | "cell_type": "code",
|
531 | 502 | "execution_count": 34,
|
532 |
| - "metadata": { |
533 |
| - "collapsed": false |
534 |
| - }, |
| 503 | + "metadata": {}, |
535 | 504 | "outputs": [
|
536 | 505 | {
|
537 | 506 | "data": {
|
|
562 | 531 | {
|
563 | 532 | "cell_type": "code",
|
564 | 533 | "execution_count": 35,
|
565 |
| - "metadata": { |
566 |
| - "collapsed": false |
567 |
| - }, |
| 534 | + "metadata": {}, |
568 | 535 | "outputs": [
|
569 | 536 | {
|
570 | 537 | "name": "stdout",
|
|
605 | 572 | {
|
606 | 573 | "cell_type": "code",
|
607 | 574 | "execution_count": 32,
|
608 |
| - "metadata": { |
609 |
| - "collapsed": false |
610 |
| - }, |
| 575 | + "metadata": {}, |
611 | 576 | "outputs": [
|
612 | 577 | {
|
613 | 578 | "data": {
|
|
629 | 594 | {
|
630 | 595 | "cell_type": "code",
|
631 | 596 | "execution_count": 33,
|
632 |
| - "metadata": { |
633 |
| - "collapsed": false |
634 |
| - }, |
| 597 | + "metadata": {}, |
635 | 598 | "outputs": [
|
636 | 599 | {
|
637 | 600 | "data": {
|
|
657 | 620 | {
|
658 | 621 | "cell_type": "code",
|
659 | 622 | "execution_count": 37,
|
660 |
| - "metadata": { |
661 |
| - "collapsed": false |
662 |
| - }, |
| 623 | + "metadata": {}, |
663 | 624 | "outputs": [
|
664 | 625 | {
|
665 | 626 | "data": {
|
|
688 | 649 | {
|
689 | 650 | "cell_type": "code",
|
690 | 651 | "execution_count": 38,
|
691 |
| - "metadata": { |
692 |
| - "collapsed": false |
693 |
| - }, |
| 652 | + "metadata": {}, |
694 | 653 | "outputs": [
|
695 | 654 | {
|
696 | 655 | "name": "stdout",
|
|
719 | 678 | {
|
720 | 679 | "cell_type": "code",
|
721 | 680 | "execution_count": 39,
|
722 |
| - "metadata": { |
723 |
| - "collapsed": false |
724 |
| - }, |
| 681 | + "metadata": {}, |
725 | 682 | "outputs": [
|
726 | 683 | {
|
727 | 684 | "name": "stdout",
|
|
818 | 775 | {
|
819 | 776 | "cell_type": "code",
|
820 | 777 | "execution_count": 40,
|
821 |
| - "metadata": { |
822 |
| - "collapsed": false |
823 |
| - }, |
| 778 | + "metadata": {}, |
824 | 779 | "outputs": [
|
825 | 780 | {
|
826 | 781 | "data": {
|
|
842 | 797 | "pl.show()"
|
843 | 798 | ]
|
844 | 799 | },
|
| 800 | + { |
| 801 | + "cell_type": "markdown", |
| 802 | + "metadata": {}, |
| 803 | + "source": [ |
| 804 | + "## Comparisons regression methods\n", |
| 805 | + "#### let's parametrize the regression methods\n" |
| 806 | + ] |
| 807 | + }, |
| 808 | + { |
| 809 | + "cell_type": "code", |
| 810 | + "execution_count": 44, |
| 811 | + "metadata": {}, |
| 812 | + "outputs": [ |
| 813 | + { |
| 814 | + "name": "stdout", |
| 815 | + "output_type": "stream", |
| 816 | + "text": [ |
| 817 | + "Method: linear regression\n", |
| 818 | + "RMSE on training: 4.6795\n", |
| 819 | + "RMSE on 10-fold CV: 5.8819\n", |
| 820 | + "\n", |
| 821 | + "\n", |
| 822 | + "Method: lasso\n", |
| 823 | + "RMSE on training: 4.8570\n", |
| 824 | + "RMSE on 10-fold CV: 5.7675\n", |
| 825 | + "\n", |
| 826 | + "\n", |
| 827 | + "Method: ridge\n", |
| 828 | + "RMSE on training: 4.6822\n", |
| 829 | + "RMSE on 10-fold CV: 5.8535\n", |
| 830 | + "\n", |
| 831 | + "\n", |
| 832 | + "Method: elastic-net\n", |
| 833 | + "RMSE on training: 4.9072\n", |
| 834 | + "RMSE on 10-fold CV: 5.4936\n", |
| 835 | + "\n", |
| 836 | + "\n" |
| 837 | + ] |
| 838 | + } |
| 839 | + ], |
| 840 | + "source": [ |
| 841 | + "a = 0.3\n", |
| 842 | + "for name,met in [\n", |
| 843 | + " ('linear regression', LinearRegression()),\n", |
| 844 | + " ('lasso', Lasso(fit_intercept=True, alpha=a)),\n", |
| 845 | + " ('ridge', Ridge(fit_intercept=True, alpha=a)),\n", |
| 846 | + " ('elastic-net', ElasticNet(fit_intercept=True, alpha=a))\n", |
| 847 | + " ]:\n", |
| 848 | + " met.fit(x,y)\n", |
| 849 | + " # p = np.array([met.predict(xi) for xi in x])\n", |
| 850 | + " p = met.predict(x)\n", |
| 851 | + " e = p-y\n", |
| 852 | + " total_error = np.dot(e,e)\n", |
| 853 | + " rmse_train = np.sqrt(total_error/len(p))\n", |
| 854 | + "\n", |
| 855 | + " kf = KFold(len(x), n_folds=10)\n", |
| 856 | + " err = 0\n", |
| 857 | + " for train,test in kf:\n", |
| 858 | + " met.fit(x[train],y[train])\n", |
| 859 | + " p = met.predict(x[test])\n", |
| 860 | + " e = p-y[test]\n", |
| 861 | + " err += np.dot(e,e)\n", |
| 862 | + "\n", |
| 863 | + " rmse_10cv = np.sqrt(err/len(x))\n", |
| 864 | + " print('Method: %s' %name)\n", |
| 865 | + " print('RMSE on training: %.4f' %rmse_train)\n", |
| 866 | + " print('RMSE on 10-fold CV: %.4f' %rmse_10cv)\n", |
| 867 | + " print('\\n')" |
| 868 | + ] |
| 869 | + }, |
845 | 870 | {
|
846 | 871 | "cell_type": "code",
|
847 | 872 | "execution_count": null,
|
|
868 | 893 | "name": "python",
|
869 | 894 | "nbconvert_exporter": "python",
|
870 | 895 | "pygments_lexer": "ipython3",
|
871 |
| - "version": "3.6.0" |
| 896 | + "version": "3.5.3" |
872 | 897 | }
|
873 | 898 | },
|
874 | 899 | "nbformat": 4,
|
|
0 commit comments