Deprecate randomized_l1 (scikit-learn#9031)

Sentient07 · ogrisel · commit c10c88622a51 · 2017-06-30T18:20:25.000+02:00
diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
@@ -227,67 +227,6 @@ alpha parameter, the fewer features selected.
    Processing Magazine [120] July 2007
    http://dsp.rice.edu/sites/dsp.rice.edu/files/cs/baraniukCSlecture07.pdf
 
-.. _randomized_l1:
-
-Randomized sparse models
--------------------------
-
-.. currentmodule:: sklearn.linear_model
-
-In terms of feature selection, there are some well-known limitations of
-L1-penalized models for regression and classification. For example, it is
-known that the Lasso will tend to select an individual variable out of a group
-of highly correlated features. Furthermore, even when the correlation between
-features is not too high, the conditions under which L1-penalized methods
-consistently select "good" features can be restrictive in general.
-
-To mitigate this problem, it is possible to use randomization techniques such
-as those presented in [B2009]_ and [M2010]_. The latter technique, known as
-stability selection, is implemented in the module :mod:`sklearn.linear_model`.
-In the stability selection method, a subsample of the data is fit to a
-L1-penalized model where the penalty of a random subset of coefficients has
-been scaled. Specifically, given a subsample of the data
-:math:`(x_i, y_i), i \in I`, where :math:`I \subset \{1, 2, \ldots, n\}` is a
-random subset of the data of size :math:`n_I`, the following modified Lasso
-fit is obtained:
-
-.. math::   \hat{w_I} = \mathrm{arg}\min_{w} \frac{1}{2n_I} \sum_{i \in I} (y_i - x_i^T w)^2 + \alpha \sum_{j=1}^p \frac{ \vert w_j \vert}{s_j},
-
-where :math:`s_j \in \{s, 1\}` are independent trials of a fair Bernoulli
-random variable, and :math:`0<s<1` is the scaling factor. By repeating this
-procedure across different random subsamples and Bernoulli trials, one can
-count the fraction of times the randomized procedure selected each feature,
-and used these fractions as scores for feature selection.
-
-:class:`RandomizedLasso` implements this strategy for regression
-settings, using the Lasso, while :class:`RandomizedLogisticRegression` uses the
-logistic regression and is suitable for classification tasks. To get a full
-path of stability scores you can use :func:`lasso_stability_path`.
-
-.. figure:: ../auto_examples/linear_model/images/sphx_glr_plot_sparse_recovery_003.png
-   :target: ../auto_examples/linear_model/plot_sparse_recovery.html
-   :align: center
-   :scale: 60
-
-Note that for randomized sparse models to be more powerful than standard
-F statistics at detecting non-zero features, the ground truth model
-should be sparse, in other words, there should be only a small fraction
-of features non zero.
-
-.. topic:: Examples:
-
-   * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_recovery.py`: An example
-     comparing different feature selection approaches and discussing in
-     which situation each approach is to be favored.
-
-.. topic:: References:
-
-  .. [B2009] F. Bach, "Model-Consistent Sparse Estimation through the
-        Bootstrap." https://hal.inria.fr/hal-00354771/
-
-  .. [M2010] N. Meinshausen, P. Buhlmann, "Stability selection",
-       Journal of the Royal Statistical Society, 72 (2010)
-       http://arxiv.org/pdf/0809.2932.pdf
 
 Tree-based feature selection
 ----------------------------
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -205,11 +205,6 @@ computes the coefficients along the full path of possible values.
       thus be used to perform feature selection, as detailed in
       :ref:`l1_feature_selection`.
 
-.. note:: **Randomized sparsity**
-
-      For feature selection or sparse recovery, it may be interesting to
-      use :ref:`randomized_l1`.
-
 
 Setting regularization parameter
 --------------------------------
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -575,6 +575,7 @@ API changes summary
      - ``utils.sparsetools.connected_components``
      - ``utils.stats.rankdata``
      - ``neighbors.approximate.LSHForest``
+     - ``linear_model.randomized_l1``
 
     - Deprecate the ``y`` parameter in `transform` and `inverse_transform`.
       The method  should not accept ``y`` parameter, as it's used at the prediction time.
@@ -1306,6 +1307,9 @@ Model evaluation and meta-estimators
      the parameter ``n_labels`` is renamed to ``n_groups``.
      :issue:`6660` by `Raghav RV`_.
 
+   - The :mod:`sklearn.linear_model.randomized_l1` is deprecated.
+     :issue: `8995` by :user:`Ramana.S <sentient07>`.
+
 Code Contributors
 -----------------
 Aditya Joshi, Alejandro, Alexander Fabisch, Alexander Loginov, Alexander
diff --git a/examples/linear_model/plot_sparse_recovery.py b/examples/linear_model/plot_sparse_recovery.py
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
@@ -30,8 +30,10 @@
 from .passive_aggressive import PassiveAggressiveClassifier
 from .passive_aggressive import PassiveAggressiveRegressor
 from .perceptron import Perceptron
+
 from .randomized_l1 import (RandomizedLasso, RandomizedLogisticRegression,
                             lasso_stability_path)
+
 from .ransac import RANSACRegressor
 from .theil_sen import TheilSenRegressor
 
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
@@ -6,9 +6,10 @@
 # Author: Gael Varoquaux, Alexandre Gramfort
 #
 # License: BSD 3 clause
+
+import warnings
 import itertools
 from abc import ABCMeta, abstractmethod
-import warnings
 
 import numpy as np
 from scipy.sparse import issparse
@@ -20,7 +21,8 @@
 from ..externals import six
 from ..externals.joblib import Memory, Parallel, delayed
 from ..feature_selection.base import SelectorMixin
-from ..utils import (as_float_array, check_random_state, check_X_y, safe_mask)
+from ..utils import (as_float_array, check_random_state, check_X_y, safe_mask,
+                     deprecated)
 from ..utils.validation import check_is_fitted
 from .least_angle import lars_path, LassoLarsIC
 from .logistic import LogisticRegression
@@ -58,6 +60,8 @@ def _resample_model(estimator_func, X, y, scaling=.5, n_resampling=200,
     return scores_
 
 
+@deprecated("The class BaseRandomizedLinearModel is deprecated in 0.19"
+            " and will be removed in 0.21.")
 class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator,
                                                    SelectorMixin)):
     """Base class to implement randomized linear models for feature selection
@@ -178,6 +182,8 @@ def _randomized_lasso(X, y, weights, mask, alpha=1., verbose=False,
     return scores
 
 
+@deprecated("The class RandomizedLasso is deprecated in 0.19"
+            " and will be removed in 0.21.")
 class RandomizedLasso(BaseRandomizedLinearModel):
     """Randomized Lasso.
 
@@ -388,6 +394,8 @@ def _randomized_logistic(X, y, weights, mask, C=1., verbose=False,
     return scores
 
 
+@deprecated("The class RandomizedLogisticRegression is deprecated in 0.19"
+            " and will be removed in 0.21.")
 class RandomizedLogisticRegression(BaseRandomizedLinearModel):
     """Randomized Logistic Regression
 
@@ -573,6 +581,8 @@ def _lasso_stability_path(X, y, mask, weights, eps):
     return alphas, coefs
 
 
+@deprecated("The function lasso_stability_path is deprecated in 0.19"
+            " and will be removed in 0.21.")
 def lasso_stability_path(X, y, scaling=0.5, random_state=None,
                          n_resampling=200, n_grid=100,
                          sample_fraction=0.75,
diff --git a/sklearn/linear_model/tests/test_randomized_l1.py b/sklearn/linear_model/tests/test_randomized_l1.py