1313from ..base import MetaEstimatorMixin
1414from ..base import clone
1515from ..base import is_classifier
16+ from ..externals .joblib import Parallel , delayed
1617from ..model_selection import check_cv
1718from ..model_selection ._validation import _safe_split , _score
1819from ..metrics .scorer import check_scoring
1920from .base import SelectorMixin
2021
2122
23+ def _rfe_single_fit (rfe , estimator , X , y , train , test , scorer ):
24+ """
25+ Return the score for a fit across one fold.
26+ """
27+ X_train , y_train = _safe_split (estimator , X , y , train )
28+ X_test , y_test = _safe_split (estimator , X , y , test , train )
29+ return rfe ._fit (
30+ X_train , y_train , lambda estimator , features :
31+ _score (estimator , X_test [:, features ], y_test , scorer )).scores_
32+
2233class RFE (BaseEstimator , MetaEstimatorMixin , SelectorMixin ):
2334 """Feature ranking with recursive feature elimination.
2435
@@ -296,6 +307,11 @@ class RFECV(RFE, MetaEstimatorMixin):
296307 verbose : int, default=0
297308 Controls verbosity of output.
298309
310+ n_jobs : int, default 1
311+ Number of cores to run in parallel while fitting across folds.
312+ Defaults to 1 core. If `n_jobs=-1`, then number of jobs is set
313+ to number of cores.
314+
299315 Attributes
300316 ----------
301317 n_features_ : int
@@ -349,12 +365,14 @@ class RFECV(RFE, MetaEstimatorMixin):
349365 for cancer classification using support vector machines",
350366 Mach. Learn., 46(1-3), 389--422, 2002.
351367 """
352- def __init__ (self , estimator , step = 1 , cv = None , scoring = None , verbose = 0 ):
368+ def __init__ (self , estimator , step = 1 , cv = None , scoring = None , verbose = 0 ,
369+ n_jobs = 1 ):
353370 self .estimator = estimator
354371 self .step = step
355372 self .cv = cv
356373 self .scoring = scoring
357374 self .verbose = verbose
375+ self .n_jobs = n_jobs
358376
359377 def fit (self , X , y ):
360378 """Fit the RFE model and automatically tune the number of selected
@@ -377,23 +395,33 @@ def fit(self, X, y):
377395 scorer = check_scoring (self .estimator , scoring = self .scoring )
378396 n_features = X .shape [1 ]
379397 n_features_to_select = 1
398+ rfe = RFE (estimator = self .estimator ,
399+ n_features_to_select = n_features_to_select ,
400+ step = self .step , verbose = self .verbose - 1 )
401+
380402
381- # Determine the number of subsets of features
382- scores = []
403+ # Determine the number of subsets of features by fitting across
404+ # the train folds and choosing the "features_to_select" parameter
405+ # that gives the least averaged error across all folds.
383406
384- # Cross-validation
385- for n , (train , test ) in enumerate (cv .split (X , y )):
386- X_train , y_train = _safe_split (self .estimator , X , y , train )
387- X_test , y_test = _safe_split (self .estimator , X , y , test , train )
407+ # Note that joblib raises a non-picklable error for bound methods
408+ # even if n_jobs is set to 1 with the default multiprocessing
409+ # backend.
410+ # This branching is done so that to
411+ # make sure that user code that sets n_jobs to 1
412+ # and provides bound methods as scorers is not broken with the
413+ # addition of n_jobs parameter in version 0.18.
414+
415+ if self .n_jobs == 1 :
416+ parallel , func = list , _rfe_single_fit
417+ else :
418+ parallel , func , = Parallel (n_jobs = self .n_jobs ), delayed (_rfe_single_fit )
388419
389- rfe = RFE ( estimator = self . estimator ,
390- n_features_to_select = n_features_to_select ,
391- step = self . step , verbose = self . verbose - 1 )
420+ scores = parallel (
421+ func ( rfe , self . estimator , X , y , train , test , scorer )
422+ for train , test in cv . split ( X , y ) )
392423
393- rfe ._fit (X_train , y_train , lambda estimator , features :
394- _score (estimator , X_test [:, features ], y_test , scorer ))
395- scores .append (np .array (rfe .scores_ [::- 1 ]).reshape (1 , - 1 ))
396- scores = np .sum (np .concatenate (scores , 0 ), 0 )
424+ scores = np .sum (scores , axis = 0 )[::- 1 ]
397425 # The index in 'scores' when 'n_features' features are selected
398426 n_feature_index = np .ceil ((n_features - n_features_to_select ) /
399427 float (self .step ))
0 commit comments