Skip to content

Commit f7310ea

Browse files
venelarsmans
authored andcommitted
Deprecate vectorizer fixed_vocabulary attribute
1 parent 2510464 commit f7310ea

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ def test_vectorizer():
453453

454454
tv.max_df = v1.max_df
455455
tfidf2 = tv.fit_transform(train_data).toarray()
456-
assert_false(tv.fixed_vocabulary)
456+
assert_false(tv.fixed_vocabulary_)
457457
assert_array_almost_equal(tfidf, tfidf2)
458458

459459
# test the direct tfidf vectorizer with new data
@@ -777,7 +777,7 @@ def test_vectorizer_pipeline_grid_selection():
777777
best_vectorizer = grid_search.best_estimator_.named_steps['vect']
778778
assert_equal(best_vectorizer.ngram_range, (1, 1))
779779
assert_equal(best_vectorizer.norm, 'l2')
780-
assert_false(best_vectorizer.fixed_vocabulary)
780+
assert_false(best_vectorizer.fixed_vocabulary_)
781781

782782

783783
def test_vectorizer_pipeline_cross_validation():
@@ -836,7 +836,7 @@ def test_tfidf_vectorizer_with_fixed_vocabulary():
836836
X_1 = vect.fit_transform(ALL_FOOD_DOCS)
837837
X_2 = vect.transform(ALL_FOOD_DOCS)
838838
assert_array_almost_equal(X_1.toarray(), X_2.toarray())
839-
assert_true(vect.fixed_vocabulary)
839+
assert_true(vect.fixed_vocabulary_)
840840

841841

842842
def test_pickling_vectorizer():

sklearn/feature_extraction/text.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
from ..preprocessing import normalize
3030
from .hashing import FeatureHasher
3131
from .stop_words import ENGLISH_STOP_WORDS
32-
from sklearn.externals import six
32+
from ..utils import deprecated
33+
from ..externals import six
3334

3435
__all__ = ['CountVectorizer',
3536
'ENGLISH_STOP_WORDS',
@@ -257,10 +258,16 @@ def _check_vocabulary(self):
257258
raise ValueError(msg)
258259
if not vocabulary:
259260
raise ValueError("empty vocabulary passed to fit")
260-
self.fixed_vocabulary = True
261+
self.fixed_vocabulary_ = True
261262
self.vocabulary_ = dict(vocabulary)
262263
else:
263-
self.fixed_vocabulary = False
264+
self.fixed_vocabulary_ = False
265+
266+
@property
267+
@deprecated("The `fixed_vocabulary` attribute is deprecated and will be "
268+
"removed in 0.18. Please use `fixed_vocabulary_` instead.")
269+
def fixed_vocabulary(self):
270+
return self.fixed_vocabulary_
264271

265272

266273
class HashingVectorizer(BaseEstimator, VectorizerMixin):
@@ -810,12 +817,13 @@ def fit_transform(self, raw_documents, y=None):
810817
min_df = self.min_df
811818
max_features = self.max_features
812819

813-
vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary)
820+
vocabulary, X = self._count_vocab(raw_documents,
821+
self.fixed_vocabulary_)
814822

815823
if self.binary:
816824
X.data.fill(1)
817825

818-
if not self.fixed_vocabulary:
826+
if not self.fixed_vocabulary_:
819827
X = self._sort_features(X, vocabulary)
820828

821829
n_doc = X.shape[0]

0 commit comments

Comments
 (0)