Deprecate vectorizer fixed_vocabulary attribute

vene · larsmans · commit f7310eafe5bf · 2014-09-02T10:42:09.000+02:00
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
@@ -453,7 +453,7 @@ def test_vectorizer():
 
     tv.max_df = v1.max_df
     tfidf2 = tv.fit_transform(train_data).toarray()
-    assert_false(tv.fixed_vocabulary)
+    assert_false(tv.fixed_vocabulary_)
     assert_array_almost_equal(tfidf, tfidf2)
 
     # test the direct tfidf vectorizer with new data
@@ -777,7 +777,7 @@ def test_vectorizer_pipeline_grid_selection():
     best_vectorizer = grid_search.best_estimator_.named_steps['vect']
     assert_equal(best_vectorizer.ngram_range, (1, 1))
     assert_equal(best_vectorizer.norm, 'l2')
-    assert_false(best_vectorizer.fixed_vocabulary)
+    assert_false(best_vectorizer.fixed_vocabulary_)
 
 
 def test_vectorizer_pipeline_cross_validation():
@@ -836,7 +836,7 @@ def test_tfidf_vectorizer_with_fixed_vocabulary():
     X_1 = vect.fit_transform(ALL_FOOD_DOCS)
     X_2 = vect.transform(ALL_FOOD_DOCS)
     assert_array_almost_equal(X_1.toarray(), X_2.toarray())
-    assert_true(vect.fixed_vocabulary)
+    assert_true(vect.fixed_vocabulary_)
 
 
 def test_pickling_vectorizer():
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
@@ -29,7 +29,8 @@
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
-from sklearn.externals import six
+from ..utils import deprecated
+from ..externals import six
 
 __all__ = ['CountVectorizer',
            'ENGLISH_STOP_WORDS',
@@ -257,10 +258,16 @@ def _check_vocabulary(self):
                         raise ValueError(msg)
             if not vocabulary:
                 raise ValueError("empty vocabulary passed to fit")
-            self.fixed_vocabulary = True
+            self.fixed_vocabulary_ = True
             self.vocabulary_ = dict(vocabulary)
         else:
-            self.fixed_vocabulary = False
+            self.fixed_vocabulary_ = False
+
+    @property
+    @deprecated("The `fixed_vocabulary` attribute is deprecated and will be "
+                "removed in 0.18.  Please use `fixed_vocabulary_` instead.")
+    def fixed_vocabulary(self):
+        return self.fixed_vocabulary_
 
 
 class HashingVectorizer(BaseEstimator, VectorizerMixin):
@@ -810,12 +817,13 @@ def fit_transform(self, raw_documents, y=None):
         min_df = self.min_df
         max_features = self.max_features
 
-        vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary)
+        vocabulary, X = self._count_vocab(raw_documents,
+                                          self.fixed_vocabulary_)
 
         if self.binary:
             X.data.fill(1)
 
-        if not self.fixed_vocabulary:
+        if not self.fixed_vocabulary_:
             X = self._sort_features(X, vocabulary)
 
             n_doc = X.shape[0]