Merge pull request scikit-learn#4824 from amueller/testing_less_warnings

jnothman · jnothman · commit 8b44fa197d4f · 2015-06-09T22:30:22.000+10:00
minor fixes to the tests, don't raise as many warnings in the test suite
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
@@ -24,7 +24,6 @@
 from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn import datasets
@@ -415,10 +414,8 @@ def test_classes_shape():
 
 
 def test_random_trees_dense_type():
-    '''
-    Test that the `sparse_output` parameter of RandomTreesEmbedding
-    works by returning a dense array.
-    '''
+    # Test that the `sparse_output` parameter of RandomTreesEmbedding
+    # works by returning a dense array.
 
     # Create the RTE with sparse=False
     hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
@@ -430,11 +427,8 @@ def test_random_trees_dense_type():
 
 
 def test_random_trees_dense_equal():
-    '''
-    Test that the `sparse_output` parameter of RandomTreesEmbedding
-    works by returning the same array for both argument
-    values.
-    '''
+    # Test that the `sparse_output` parameter of RandomTreesEmbedding
+    # works by returning the same array for both argument values.
 
     # Create the RTEs
     hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
@@ -807,8 +801,7 @@ def check_class_weight_balanced_and_bootstrap_multi_output(name):
     clf = ForestClassifier(class_weight='balanced_subsample', random_state=0)
     clf.fit(X, _y)
     clf = ForestClassifier(class_weight='subsample', random_state=0)
-    #assert_warns_message(DeprecationWarning, "balanced_subsample", clf.fit, X, _y)
-    clf.fit(X, _y)
+    ignore_warnings(clf.fit)(X, _y)
 
 
 def test_class_weight_balanced_and_bootstrap_multi_output():
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -109,7 +109,7 @@ def test_non_consicutive_labels():
 
 def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
                              seed=42):
-    """Compute score for random uniform cluster labelings"""
+    # Compute score for random uniform cluster labelings
     random_labels = np.random.RandomState(seed).random_integers
     scores = np.zeros((len(k_range), n_runs))
     for i, k in enumerate(k_range):
@@ -121,7 +121,7 @@ def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
 
 
 def test_adjustment_for_chance():
-    """Check that adjusted scores are almost zero on random labels"""
+    # Check that adjusted scores are almost zero on random labels
     n_clusters_range = [2, 10, 50, 90]
     n_samples = 100
     n_runs = 10
@@ -134,7 +134,7 @@ def test_adjustment_for_chance():
 
 
 def test_adjusted_mutual_info_score():
-    """Compute the Adjusted Mutual Information and test against known values"""
+    # Compute the Adjusted Mutual Information and test against known values
     labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
     labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
     # Mutual information
@@ -177,7 +177,7 @@ def test_contingency_matrix():
 
 
 def test_exactly_zero_info_score():
-    """Check numerical stability when information is exactly zero"""
+    # Check numerical stability when information is exactly zero
     for i in np.logspace(1, 4, 4).astype(np.int):
         labels_a, labels_b = np.ones(i, dtype=np.int),\
             np.arange(i, dtype=np.int)
@@ -188,7 +188,7 @@ def test_exactly_zero_info_score():
 
 
 def test_v_measure_and_mutual_information(seed=36):
-    """Check relation between v_measure, entropy and mutual information"""
+    # Check relation between v_measure, entropy and mutual information
     for i in np.logspace(1, 4, 4).astype(np.int):
         random_state = np.random.RandomState(seed)
         labels_a, labels_b = random_state.random_integers(0, 10, i),\
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -9,7 +9,7 @@
 
 
 def test_silhouette():
-    """Tests the Silhouette Coefficient. """
+    # Tests the Silhouette Coefficient.
     dataset = datasets.load_iris()
     X = dataset.data
     y = dataset.target
@@ -39,10 +39,8 @@ def test_silhouette():
 
 
 def test_no_nan():
-    """Assert Silhouette Coefficient != nan when there is 1 sample in a class.
-
-        This tests for the condition that caused issue 960.
-    """
+    # Assert Silhouette Coefficient != nan when there is 1 sample in a class.
+    # This tests for the condition that caused issue 960.
     # Note that there is only one sample in cluster 0. This used to cause the
     # silhouette_score to return nan (see bug #960).
     labels = np.array([1, 0, 1, 1, 1])
@@ -53,7 +51,7 @@ def test_no_nan():
 
 
 def test_correct_labelsize():
-    """Assert 1 < n_labels < n_samples"""
+    # Assert 1 < n_labels < n_samples
     dataset = datasets.load_iris()
     X = dataset.data
 
diff --git a/sklearn/metrics/cluster/unsupervised.py b/sklearn/metrics/cluster/unsupervised.py
@@ -161,8 +161,7 @@ def silhouette_samples(X, labels, metric='euclidean', **kwds):
     B = np.array([_nearest_cluster_distance(distances[i], labels, i)
                   for i in range(n)])
     sil_samples = (B - A) / np.maximum(A, B)
-    # nan values are for clusters of size 1, and should be 0
-    return np.nan_to_num(sil_samples)
+    return sil_samples
 
 
 def _intra_cluster_distance(distances_row, labels, i):
@@ -187,6 +186,9 @@ def _intra_cluster_distance(distances_row, labels, i):
     """
     mask = labels == labels[i]
     mask[i] = False
+    if not np.any(mask):
+        # cluster of size 1
+        return 0
     a = np.mean(distances_row[mask])
     return a
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -23,6 +23,7 @@
 from sklearn.utils.testing import assert_greater, assert_in, assert_less
 from sklearn.utils.testing import assert_raises_regexp, assert_warns
 from sklearn.utils.testing import assert_warns_message, assert_raise_message
+from sklearn.utils.testing import ignore_warnings
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
@@ -793,6 +794,8 @@ def test_unfitted():
                          clf.predict, X)
 
 
+# ignore convergence warnings from max_iter=1
+@ignore_warnings
 def test_consistent_proba():
     a = svm.SVC(probability=True, max_iter=1, random_state=0)
     proba_1 = a.fit(X, Y).predict_proba(X)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -147,7 +147,6 @@ def _yield_clustering_checks(name, Clusterer):
 
 
 def _yield_all_checks(name, Estimator):
-    #yield check_parameters_default_constructible, name, Estimator
     for check in _yield_non_meta_checks(name, Estimator):
         yield check
     if issubclass(Estimator, ClassifierMixin):
@@ -795,6 +794,7 @@ def check_estimators_fit_returns_self(name, Estimator):
     assert_true(estimator.fit(X, y) is estimator)
 
 
+@ignore_warnings
 def check_estimators_unfitted(name, Estimator):
     """Check that predict raises an exception in an unfitted estimator.