Merge pull request scikit-learn#29 from glouppe/adaboost

ndawe · ndawe · commit 9b183a8c652e · 2013-02-03T10:50:03.000-08:00
FIX: some of Gael comments
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
@@ -281,7 +281,7 @@ concentrate on the examples that are missed by the previous ones in the sequence
 AdaBoost can be used both for classification and regression problems:
 
   - For multi-class classification, :class:`AdaBoostClassifier` implements
-    AdaBoost-SAMME [ZZRH2009]_.
+    AdaBoost-SAMME and AdaBoost-SAMME.R [ZZRH2009]_.
 
   - For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_.
 
diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py
@@ -10,12 +10,15 @@
 spheres such that roughly equal numbers of samples are in each class (quantiles
 of the :math:`\chi^2` distribution).
 
-The performance of the SAMME and SAMME.R [1] algorithms are compared.
-The error of each algorithm on the test set after each boosting iteration is
-shown on the left, the classification error on the test set of each tree is
-shown in the middle, and the boost weight of each tree is shown on the right.
-All trees have a weight of one in the SAMME.R algorithm and therefore are not
-shown.
+The performance of the SAMME and SAMME.R [1] algorithms are compared. SAMME.R
+uses the probability estimates to update the additive model, while SAMME  uses
+the classifications only. As the example illustrates, the SAMME.R algorithm
+typically converges faster than SAMME, achieving a lower test error with fewer
+boosting iterations. The error of each algorithm on the test set after each
+boosting iteration is shown on the left, the classification error on the test
+set of each tree is shown in the middle, and the boost weight of each tree is
+shown on the right. All trees have a weight of one in the SAMME.R algorithm and
+therefore are not shown.
 
 .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
 
diff --git a/sklearn/datasets/samples_generator.py b/sklearn/datasets/samples_generator.py
@@ -331,7 +331,7 @@ def make_hastie_10_2(n_samples=12000, random_state=None):
     The ten features are standard independent Gaussian and
     the target ``y`` is defined by::
 
-      y[i] = 1 if np.sum(X[i]**2) > 9.34 else -1
+      y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1
 
     Parameters
     ----------
@@ -1291,9 +1291,8 @@ def make_gaussian_quantiles(mean=None, cov=1., n_samples=100,
     # Label by quantile
     step = n_samples // n_classes
 
-    y = np.hstack([
-        np.repeat(np.arange(n_classes), step),
-        np.repeat(n_classes - 1, n_samples - step * n_classes)])
+    y = np.hstack([np.repeat(np.arange(n_classes), step),
+                   np.repeat(n_classes - 1, n_samples - step * n_classes)])
 
     if shuffle:
         X, y = util_shuffle(X, y, random_state=generator)
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -9,6 +9,8 @@
 from nose.tools import assert_true
 from nose.tools import assert_raises
 
+from sklearn.dummy import DummyClassifier
+from sklearn.dummy import DummyRegressor
 from sklearn.grid_search import GridSearchCV
 from sklearn.ensemble import AdaBoostClassifier
 from sklearn.ensemble import AdaBoostRegressor
@@ -179,10 +181,6 @@ def test_importances():
 
 def test_error():
     """Test that it gives proper exception on deficient input."""
-    from sklearn.dummy import DummyClassifier
-    from sklearn.dummy import DummyRegressor
-
-    # Invalid values for parameters
     assert_raises(ValueError,
                   AdaBoostClassifier(learning_rate=-1).fit,
                   X, y)
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
@@ -438,15 +438,13 @@ def _boost_real(self, iboost, X, y, sample_weight):
         if estimator_error <= 0:
             return sample_weight, 1., 0.
 
-        """
-        Construct y coding as described in Zhu et al [2]::
-
-            y_k = 1 if c == k else -1 / (K - 1)
-
-        where K == n_classes_ and c, k in [0, K) are indices along the second
-        axis of the y coding with c being the index corresponding to the true
-        class label.
-        """
+        # Construct y coding as described in Zhu et al [2]:
+        #
+        #    y_k = 1 if c == k else -1 / (K - 1)
+        #
+        # where K == n_classes_ and c, k in [0, K) are indices along the second
+        # axis of the y coding with c being the index corresponding to the true
+        # class label.
         n_classes = self.n_classes_
         classes = self.classes_
         y_codes = np.array([-1. / (n_classes - 1), 1.])