alexrudy
diff --git a/‎circle.yml‎
Lines changed: 5 additions & 0 deletions b/‎circle.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎continuous_integration/appveyor/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎continuous_integration/appveyor/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/whats_new.rst‎
Lines changed: 10 additions & 0 deletions b/‎doc/whats_new.rst‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/ensemble/plot_feature_transformation.py‎
Lines changed: 7 additions & 7 deletions b/‎examples/ensemble/plot_feature_transformation.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎examples/ensemble/plot_random_forest_embedding.py‎
Lines changed: 1 addition & 4 deletions b/‎examples/ensemble/plot_random_forest_embedding.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎examples/model_selection/plot_roc.py‎
Lines changed: 16 additions & 2 deletions b/‎examples/model_selection/plot_roc.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎sklearn/cluster/_feature_agglomeration.py‎
Lines changed: 2 additions & 16 deletions b/‎sklearn/cluster/_feature_agglomeration.py‎
Lines changed: 2 additions & 16 deletions
diff --git a/‎sklearn/cluster/dbscan_.py‎
Lines changed: 2 additions & 18 deletions b/‎sklearn/cluster/dbscan_.py‎
Lines changed: 2 additions & 18 deletions
diff --git a/‎sklearn/cluster/hierarchical.py‎
Lines changed: 3 additions & 41 deletions b/‎sklearn/cluster/hierarchical.py‎
Lines changed: 3 additions & 41 deletions
diff --git a/‎sklearn/cluster/mean_shift_.py‎
Lines changed: 1 addition & 7 deletions b/‎sklearn/cluster/mean_shift_.py‎
Lines changed: 1 addition & 7 deletions
@@ -0,0 +1,5 @@
+general:
+  # Restric the build to the branch master only
+  branches:
+    only:
+       - master
@@ -2,7 +2,7 @@
 # Those wheels were collected from http://www.lfd.uci.edu/~gohlke/pythonlibs/
 # This is a temporary solution. As soon as numpy and scipy provide official
 # wheel for windows we ca delete this --find-links line.
---find-links http://28daf2247a33ed269873-7b1aad3fab3cc330e1fd9d109892382a.r6.cf2.rackcdn.com/index.html
+--find-links http://28daf2247a33ed269873-7b1aad3fab3cc330e1fd9d109892382a.r6.cf2.rackcdn.com/
 
 # fix the versions of numpy to force the use of numpy and scipy to use the whl
 # of the rackspace folder instead of trying to install from more recent
 
@@ -64,6 +64,10 @@ New features
      shuffling step in the ``cd`` solver.
      By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
 
+   - **IndexError** bug `#5495
+     <https://github.com/scikit-learn/scikit-learn/issues/5495>`_ when
+     doing OVR(SVC(decision_function_shape="ovr")). Fixed by `Elvis Dohmatob`_.
+
 Enhancements
 ............
    - :class:`manifold.TSNE` now supports approximate optimization via the
@@ -280,6 +284,10 @@ Bug fixes
       <https://github.com/scikit-learn/scikit-learn/pull/4478>`_)
       By `Andreas Müller`_, `Loic Esteve`_ and `Giorgio Patrini`_.
 
+    - Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
+      platform dependent output, and failed on `fit_transform`.
+       By `Arthur Mensch`_.
+
 API changes summary
 -------------------
     - Attribute `data_min`, `data_max` and `data_range` in
@@ -3766,3 +3774,5 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Jean Kossaifi: https://github.com/JeanKossaifi
 .. _Andrew Lamb: https://github.com/andylamb
 .. _Graham Clenaghan: https://github.com/gclenaghan
+.. _Giorgio Patrini: https://github.com/giorgiop
+.. _Elvis Dohmatob: https://github.com/dohmatob
@@ -34,10 +34,10 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import (RandomTreesEmbedding, RandomForestClassifier,
                               GradientBoostingClassifier)
-from sklearn.feature_selection import SelectFromModel
 from sklearn.preprocessing import OneHotEncoder
 from sklearn.cross_validation import train_test_split
 from sklearn.metrics import roc_curve
+from sklearn.pipeline import make_pipeline
 
 n_estimator = 10
 X, y = make_classification(n_samples=80000)
@@ -51,13 +51,13 @@
                                                             test_size=0.5)
 
 # Unsupervised transformation based on totally random trees
-rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator)
-rt_lm = LogisticRegression()
-rt.fit(X_train, y_train)
-rt_lm.fit(SelectFromModel(rt, prefit=True).transform(X_train_lr), y_train_lr)
+rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator,
+	random_state=0)
 
-y_pred_rt = rt_lm.predict_proba(
-	SelectFromModel(rt, prefit=True).transform(X_test))[:, 1]
+rt_lm = LogisticRegression()
+pipeline = make_pipeline(rt, rt_lm)
+pipeline.fit(X_train, y_train)
+y_pred_rt = pipeline.predict_proba(X_test)[:, 1]
 fpr_rt_lm, tpr_rt_lm, _ = roc_curve(y_test, y_pred_rt)
 
 # Supervised transformation based on random forests
 
@@ -30,17 +30,14 @@
 from sklearn.datasets import make_circles
 from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
 from sklearn.decomposition import TruncatedSVD
-from sklearn.feature_selection import SelectFromModel
 from sklearn.naive_bayes import BernoulliNB
 
 # make a synthetic dataset
 X, y = make_circles(factor=0.5, random_state=0, noise=0.05)
 
 # use RandomTreesEmbedding to transform data
 hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
-hasher.fit(X)
-model = SelectFromModel(hasher, prefit=True)
-X_transformed = model.transform(X)
+X_transformed = hasher.fit_transform(X)
 
 # Visualize result using PCA
 pca = TruncatedSVD(n_components=2)
 
@@ -44,6 +44,7 @@
 from sklearn.cross_validation import train_test_split
 from sklearn.preprocessing import label_binarize
 from sklearn.multiclass import OneVsRestClassifier
+from scipy import interp
 
 # Import some data to play with
 iris = datasets.load_iris()
@@ -99,10 +100,23 @@
 # Plot ROC curves for the multiclass problem
 
 # Compute macro-average ROC curve and ROC area
-fpr["macro"] = np.mean([fpr[i] for i in range(n_classes)], axis=0)
-tpr["macro"] = np.mean([tpr[i] for i in range(n_classes)], axis=0)
+
+# First aggregate all false positive rates
+all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
+
+# Then interpolate all ROC curves at this points
+mean_tpr = np.zeros_like(all_fpr)
+for i in range(n_classes):
+    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
+
+# Finally average it and compute AUC
+mean_tpr /= n_classes
+
+fpr["macro"] = all_fpr
+tpr["macro"] = mean_tpr
 roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
 
+# Plot all ROC curves
 plt.figure()
 plt.plot(fpr["micro"], tpr["micro"],
          label='micro-average ROC curve (area = {0:0.2f})'
 
@@ -11,9 +11,6 @@
 from ..utils import check_array
 from ..utils.validation import check_is_fitted
 
-import warnings
-
-
 ###############################################################################
 # Mixin class for feature agglomeration.
 
@@ -24,7 +21,7 @@ class AgglomerationTransform(TransformerMixin):
 
     pooling_func = np.mean
 
-    def transform(self, X, pooling_func=None):
+    def transform(self, X):
         """
         Transform a new matrix using the built clustering
 
@@ -34,25 +31,14 @@ def transform(self, X, pooling_func=None):
             A M by N array of M observations in N dimensions or a length
             M array of M one-dimensional observations.
 
-        pooling_func : callable, default=np.mean
-            This combines the values of agglomerated features into a single
-            value, and should accept an array of shape [M, N] and the keyword
-            argument `axis=1`, and reduce it to an array of size [M].
-
         Returns
         -------
         Y : array, shape = [n_samples, n_clusters] or [n_clusters]
             The pooled values for each feature cluster.
         """
         check_is_fitted(self, "labels_")
 
-        if pooling_func is not None:
-            warnings.warn("The pooling_func parameter is deprecated since 0.15 "
-                          "and will be removed in 0.18. "
-                          "Pass it to the constructor instead.",
-                          DeprecationWarning)
-        else:
-            pooling_func = self.pooling_func
+        pooling_func = self.pooling_func
         X = check_array(X)
         nX = []
         if len(self.labels_) != X.shape[1]:
 
@@ -9,8 +9,6 @@
 #
 # License: BSD 3 clause
 
-import warnings
-
 import numpy as np
 from scipy import sparse
 
@@ -24,8 +22,7 @@
 
 
 def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
-           algorithm='auto', leaf_size=30, p=2, sample_weight=None,
-           random_state=None):
+           algorithm='auto', leaf_size=30, p=2, sample_weight=None):
     """Perform DBSCAN clustering from vector array or distance matrix.
 
     Read more in the :ref:`User Guide <dbscan>`.
@@ -75,10 +72,6 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
         weight may inhibit its eps-neighbor from being core.
         Note that weights are absolute, and default to 1.
 
-    random_state: numpy.RandomState, optional
-        Deprecated and ignored as of version 0.16, will be removed in version
-        0.18. DBSCAN does not use random initialization.
-
     Returns
     -------
     core_samples : array [n_core_samples]
@@ -109,11 +102,6 @@ def dbscan(X, eps=0.5, min_samples=5, metric='minkowski',
     """
     if not eps > 0.0:
         raise ValueError("eps must be positive.")
-    if random_state is not None:
-        warnings.warn("The parameter random_state is deprecated in 0.16 "
-                      "and will be removed in version 0.18. "
-                      "DBSCAN is deterministic except for rare border cases.",
-                      category=DeprecationWarning)
 
     X = check_array(X, accept_sparse='csr')
     if sample_weight is not None:
@@ -195,9 +183,6 @@ class DBSCAN(BaseEstimator, ClusterMixin):
         of the construction and query, as well as the memory required
         to store the tree. The optimal value depends
         on the nature of the problem.
-    random_state: numpy.RandomState, optional
-        Deprecated and ignored as of version 0.16, will be removed in version
-        0.18. DBSCAN does not use random initialization.
 
     Attributes
     ----------
@@ -233,14 +218,13 @@ class DBSCAN(BaseEstimator, ClusterMixin):
     """
 
     def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
-                 algorithm='auto', leaf_size=30, p=None, random_state=None):
+                 algorithm='auto', leaf_size=30, p=None):
         self.eps = eps
         self.min_samples = min_samples
         self.metric = metric
         self.algorithm = algorithm
         self.leaf_size = leaf_size
         self.p = p
-        self.random_state = random_state
 
     def fit(self, X, y=None, sample_weight=None):
         """Perform DBSCAN clustering from features or distance matrix.
 
@@ -86,8 +86,7 @@ def _fix_connectivity(X, connectivity, n_components=None,
 ###############################################################################
 # Hierarchical tree building functions
 
-def ward_tree(X, connectivity=None, n_components=None, n_clusters=None,
-              return_distance=False):
+def ward_tree(X, connectivity=None, n_clusters=None, return_distance=False):
     """Ward clustering based on a Feature matrix.
 
     Recursively merges the pair of clusters that minimally increases
@@ -111,12 +110,6 @@ def ward_tree(X, connectivity=None, n_components=None, n_clusters=None,
         be symmetric and only the upper triangular half is used.
         Default is None, i.e, the Ward algorithm is unstructured.
 
-    n_components : int (optional)
-        Number of connected components. If None the number of connected
-        components is estimated from the connectivity matrix.
-        NOTE: This parameter is now directly determined directly
-        from the connectivity matrix and will be removed in 0.18
-
     n_clusters : int (optional)
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
@@ -199,11 +192,6 @@ def ward_tree(X, connectivity=None, n_components=None, n_clusters=None,
         else:
             return children_, 1, n_samples, None
 
-    if n_components is not None:
-        warnings.warn(
-            "n_components is now directly calculated from the connectivity "
-            "matrix and will be removed in 0.18",
-            DeprecationWarning)
     connectivity, n_components = _fix_connectivity(X, connectivity)
     if n_clusters is None:
         n_nodes = 2 * n_samples - 1
@@ -326,12 +314,6 @@ def linkage_tree(X, connectivity=None, n_components=None,
         be symmetric and only the upper triangular half is used.
         Default is None, i.e, the Ward algorithm is unstructured.
 
-    n_components : int (optional)
-        Number of connected components. If None the number of connected
-        components is estimated from the connectivity matrix.
-        NOTE: This parameter is now directly determined directly
-        from the connectivity matrix and will be removed in 0.18
-
     n_clusters : int (optional)
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
@@ -435,11 +417,6 @@ def linkage_tree(X, connectivity=None, n_components=None,
             return children_, 1, n_samples, None, distances
         return children_, 1, n_samples, None
 
-    if n_components is not None:
-        warnings.warn(
-            "n_components is now directly calculated from the connectivity "
-            "matrix and will be removed in 0.18",
-            DeprecationWarning)
     connectivity, n_components = _fix_connectivity(X, connectivity)
 
     connectivity = connectivity.tocoo()
@@ -636,12 +613,6 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
 
-    n_components : int (optional)
-        Number of connected components. If None the number of connected
-        components is estimated from the connectivity matrix.
-        NOTE: This parameter is now directly determined from the connectivity
-        matrix and will be removed in 0.18
-
     compute_full_tree : bool or 'auto' (optional)
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
@@ -689,12 +660,10 @@ class AgglomerativeClustering(BaseEstimator, ClusterMixin):
 
     def __init__(self, n_clusters=2, affinity="euclidean",
                  memory=Memory(cachedir=None, verbose=0),
-                 connectivity=None, n_components=None,
-                 compute_full_tree='auto', linkage='ward',
-                 pooling_func=np.mean):
+                 connectivity=None, compute_full_tree='auto',
+                 linkage='ward', pooling_func=np.mean):
         self.n_clusters = n_clusters
         self.memory = memory
-        self.n_components = n_components
         self.connectivity = connectivity
         self.compute_full_tree = compute_full_tree
         self.linkage = linkage
@@ -760,7 +729,6 @@ def fit(self, X, y=None):
             kwargs['affinity'] = self.affinity
         self.children_, self.n_components_, self.n_leaves_, parents = \
             memory.cache(tree_builder)(X, connectivity,
-                                       n_components=self.n_components,
                                        n_clusters=n_clusters,
                                        **kwargs)
         # Cut the tree
@@ -807,12 +775,6 @@ class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
 
-    n_components : int (optional)
-        Number of connected components. If None the number of connected
-        components is estimated from the connectivity matrix.
-        NOTE: This parameter is now directly determined from the connectivity
-        matrix and will be removed in 0.18
-
     compute_full_tree : bool or 'auto', optional, default "auto"
         Stop early the construction of the tree at n_clusters. This is
         useful to decrease computation time if the number of clusters is
 
@@ -93,7 +93,7 @@ def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):
 
 def mean_shift(X, bandwidth=None, seeds=None, bin_seeding=False,
                min_bin_freq=1, cluster_all=True, max_iter=300,
-               max_iterations=None, n_jobs=1):
+               n_jobs=1):
     """Perform mean shift clustering of data using a flat kernel.
 
     Read more in the :ref:`User Guide <mean_shift>`.
@@ -161,12 +161,6 @@ def mean_shift(X, bandwidth=None, seeds=None, bin_seeding=False,
     See examples/cluster/plot_meanshift.py for an example.
 
     """
-    # FIXME To be removed in 0.18
-    if max_iterations is not None:
-        warnings.warn("The `max_iterations` parameter has been renamed to "
-                      "`max_iter` from version 0.16. The `max_iterations` "
-                      "parameter will be removed in 0.18", DeprecationWarning)
-        max_iter = max_iterations
 
     if bandwidth is None:
         bandwidth = estimate_bandwidth(X)