Skip to content

Commit 1adfed9

Browse files
committed
Merge pull request scikit-learn#1656 from rlmv/idf_diag
Fix AttributeError caused by an unlearned idf vector
2 parents 7d4034a + fcca1df commit 1adfed9

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

sklearn/feature_extraction/tests/test_text.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,10 @@ def test_vectorizer():
389389
t2 = TfidfTransformer(norm='l1', use_idf=False)
390390
tf = t2.fit(counts_train).transform(counts_train).toarray()
391391
assert_equal(t2.idf_, None)
392+
393+
# test idf transform with unlearned idf vector
394+
t3 = TfidfTransformer(use_idf=True)
395+
assert_raises(ValueError, t3.transform, counts_train)
392396

393397
# L1-normalized term frequencies sum to one
394398
assert_array_almost_equal(np.sum(tf, axis=1), [1.0] * n_train)

sklearn/feature_extraction/text.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,8 @@ def transform(self, X, copy=True):
915915
X.data += 1
916916

917917
if self.use_idf:
918+
if not hasattr(self, "_idf_diag"):
919+
raise ValueError("idf vector not fitted")
918920
expected_n_features = self._idf_diag.shape[0]
919921
if n_features != expected_n_features:
920922
raise ValueError("Input has n_features=%d while the model"

0 commit comments

Comments
 (0)