Skip to content

Commit 71d00d9

Browse files
committed
ENH use an array instead of a dict in RFECV
1 parent b91f5c0 commit 71d00d9

File tree

1 file changed

+10
-16
lines changed
  • sklearn/feature_selection

1 file changed

+10
-16
lines changed

sklearn/feature_selection/rfe.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def fit(self, X, y):
334334
verbose=self.verbose - 1)
335335

336336
cv = check_cv(self.cv, X, y, is_classifier(self.estimator))
337-
scores = {}
337+
scores = np.zeros(X.shape[1])
338338

339339
# Cross-validation
340340
n = 0
@@ -343,35 +343,32 @@ def fit(self, X, y):
343343
# Compute a full ranking of the features
344344
ranking_ = rfe.fit(X[train], y[train]).ranking_
345345
# Score each subset of features
346-
for k in xrange(1, max(ranking_) + 1):
347-
mask = np.where(ranking_ <= k)[0]
346+
for k in xrange(0, max(ranking_)):
347+
mask = np.where(ranking_ <= k + 1)[0]
348348
estimator = clone(self.estimator)
349349
estimator.fit(X[train][:, mask], y[train])
350350

351351
if self.loss_func is None:
352-
score_k = 1.0 - estimator.score(X[test][:, mask], y[test])
352+
loss_k = 1.0 - estimator.score(X[test][:, mask], y[test])
353353
else:
354-
score_k = self.loss_func(
354+
loss_k = self.loss_func(
355355
y[test], estimator.predict(X[test][:, mask]))
356356

357-
if not k in scores:
358-
scores[k] = 0.0
359-
360357
if self.verbose > 0:
361358
print("Finished fold with %d / %d feature ranks, loss=%f"
362-
% (k, max(ranking_), score_k))
363-
scores[k] += score_k
359+
% (k, max(ranking_), loss_k))
360+
scores[k] += loss_k
364361

365362
n += 1
366363

367364
# Pick the best number of features on average
368365
best_score = np.inf
369366
best_k = None
370367

371-
for k, score in sorted(scores.iteritems()):
368+
for k, score in enumerate(scores):
372369
if score < best_score:
373370
best_score = score
374-
best_k = k
371+
best_k = k + 1
375372

376373
# Re-execute an elimination with best_k over the whole set
377374
rfe = RFE(estimator=self.estimator,
@@ -388,8 +385,5 @@ def fit(self, X, y):
388385
self.support_ = rfe.support_
389386
self.ranking_ = rfe.ranking_
390387

391-
self.cv_scores_ = [0] * len(scores)
392-
for k, score in scores.iteritems():
393-
self.cv_scores_[k - 1] = score / n
394-
388+
self.cv_scores_ = scores / n
395389
return self

0 commit comments

Comments
 (0)