Merge pull request scikit-learn#5063 from amueller/bagging_input_validation

jnothman · jnothman · commit 070663634ae9 · 2015-08-27T12:35:49.000+10:00
[MRG] test for accepted sparse matrix types
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
@@ -251,7 +251,7 @@ def fit(self, X, y, sample_weight=None):
         random_state = check_random_state(self.random_state)
 
         # Convert data
-        X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
+        X, y = check_X_y(X, y, ['csr', 'csc'])
 
         # Remap output
         n_samples, self.n_features_ = X.shape
@@ -587,7 +587,7 @@ def predict_proba(self, X):
         """
         check_is_fitted(self, "classes_")
         # Check data
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        X = check_array(X, accept_sparse=['csr', 'csc'])
 
         if self.n_features_ != X.shape[1]:
             raise ValueError("Number of features of the model must "
@@ -865,7 +865,7 @@ def predict(self, X):
         """
         check_is_fitted(self, "estimators_features_")
         # Check data
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        X = check_array(X, accept_sparse=['csr', 'csc'])
 
         # Parallel loop
         n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,
diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
@@ -319,7 +319,7 @@ def fit(self, X, y):
         self : object
             Returns self.
         """
-        X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
+        X, y = check_X_y(X, y, ['csr', 'csc'])
 
         if not callable(self.score_func):
             raise TypeError("The score function should be a callable, %s (%s) "
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -1074,7 +1074,8 @@ def fit(self, X, y):
             # by the model fitting itself
             X = check_array(X, 'csc', copy=False)
             if sparse.isspmatrix(X):
-                if not np.may_share_memory(reference_to_old_X.data, X.data):
+                if (hasattr(reference_to_old_X, "data") and
+                        not np.may_share_memory(reference_to_old_X.data, X.data)):
                     # X is a sparse matrix and has been copied
                     copy_X = False
             elif not np.may_share_memory(reference_to_old_X, X):
diff --git a/sklearn/linear_model/randomized_l1.py b/sklearn/linear_model/randomized_l1.py
@@ -88,7 +88,7 @@ def fit(self, X, y):
         self : object
             Returns an instance of self.
         """
-        X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True)
+        X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True)
         X = as_float_array(X, copy=False)
         n_samples, n_features = X.shape
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -269,34 +269,38 @@ def check_estimator_sparse_data(name, Estimator):
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
-    X = sparse.csr_matrix(X)
+    X_csr = sparse.csr_matrix(X)
     y = (4 * rng.rand(40)).astype(np.int)
-    # catch deprecation warnings
-    with warnings.catch_warnings():
-        if name in ['Scaler', 'StandardScaler']:
-            estimator = Estimator(with_mean=False)
-        else:
-            estimator = Estimator()
-    set_fast_parameters(estimator)
-    # fit and predict
-    try:
-        estimator.fit(X, y)
-        if hasattr(estimator, "predict"):
-            estimator.predict(X)
-        if hasattr(estimator, 'predict_proba'):
-            estimator.predict_proba(X)
-    except TypeError as e:
-        if 'sparse' not in repr(e):
+    for sparse_format in ['csr', 'csc', 'dok', 'lil', 'coo', 'dia', 'bsr']:
+        X = X_csr.asformat(sparse_format)
+        # catch deprecation warnings
+        with warnings.catch_warnings():
+            if name in ['Scaler', 'StandardScaler']:
+                estimator = Estimator(with_mean=False)
+            else:
+                estimator = Estimator()
+        set_fast_parameters(estimator)
+        # fit and predict
+        try:
+            estimator.fit(X, y)
+            if hasattr(estimator, "predict"):
+                pred = estimator.predict(X)
+                assert_equal(pred.shape, (X.shape[0],))
+            if hasattr(estimator, 'predict_proba'):
+                probs = estimator.predict_proba(X)
+                assert_equal(probs.shape, (X.shape[0], 4))
+        except TypeError as e:
+            if 'sparse' not in repr(e):
+                print("Estimator %s doesn't seem to fail gracefully on "
+                      "sparse data: error message state explicitly that "
+                      "sparse input is not supported if this is not the case."
+                      % name)
+                raise
+        except Exception:
             print("Estimator %s doesn't seem to fail gracefully on "
-                  "sparse data: error message state explicitly that "
-                  "sparse input is not supported if this is not the case."
-                  % name)
+                  "sparse data: it should raise a TypeError if sparse input "
+                  "is explicitly not supported." % name)
             raise
-    except Exception:
-        print("Estimator %s doesn't seem to fail gracefully on "
-              "sparse data: it should raise a TypeError if sparse input "
-              "is explicitly not supported." % name)
-        raise
 
 
 def check_dtype_object(name, Estimator):