|
1 | | -""" |
2 | | -Testing for the boost module (sklearn.ensemble.boost). |
3 | | -""" |
| 1 | +"""Testing for the boost module (sklearn.ensemble.boost).""" |
4 | 2 |
|
5 | 3 | import numpy as np |
6 | 4 | from numpy.testing import assert_array_equal, assert_array_less |
|
15 | 13 | from sklearn.svm import SVC, SVR |
16 | 14 | from sklearn.utils import shuffle |
17 | 15 | from sklearn.cross_validation import train_test_split |
18 | | -from scipy.sparse import csc_matrix, csr_matrix |
| 16 | +from scipy.sparse import csc_matrix, csr_matrix, coo_matrix, dok_matrix |
| 17 | +from scipy.sparse import lil_matrix |
19 | 18 | from sklearn import datasets |
20 | 19 |
|
21 | 20 |
|
@@ -243,109 +242,91 @@ def test_base_estimator(): |
243 | 242 |
|
244 | 243 |
|
245 | 244 | def test_sparse_classification(): |
246 | | - """Check classification for various parameter settings on sparse input.""" |
247 | | - |
| 245 | + """Check classification on sparse input.""" |
248 | 246 | class CustomSVC(SVC): |
249 | | - """SVC variant that records the nature of the training set""" |
| 247 | + |
| 248 | + """SVC variant that records the nature of the training set.""" |
250 | 249 |
|
251 | 250 | def fit(self, X, y, sample_weight=None): |
| 251 | + """Modification on fit caries data type for later verification.""" |
252 | 252 | super(CustomSVC, self).fit(X, y, sample_weight=sample_weight) |
253 | 253 | self.data_type_ = type(X) |
254 | 254 | return self |
255 | 255 |
|
256 | | - X_train, X_test, y_train, y_test = train_test_split(iris.data, |
257 | | - iris.target, |
258 | | - random_state=rng) |
259 | | - parameter_sets = [ |
260 | | - {"learning_rate": 0.5}, |
261 | | - {"learning_rate": 1.0}, |
262 | | - ] |
| 256 | + X, y = datasets.make_classification() |
| 257 | + |
| 258 | + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) |
263 | 259 |
|
264 | | - for sparse_format in [csc_matrix, csr_matrix]: |
| 260 | + for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, |
| 261 | + dok_matrix]: |
265 | 262 | X_train_sparse = sparse_format(X_train) |
266 | 263 | X_test_sparse = sparse_format(X_test) |
267 | 264 |
|
268 | | - for params in parameter_sets: |
| 265 | + # Trained on sparse format |
| 266 | + sparse_classifier = AdaBoostClassifier( |
| 267 | + base_estimator=CustomSVC(), |
| 268 | + random_state=1, |
| 269 | + algorithm="SAMME" |
| 270 | + ).fit(X_train_sparse, y_train) |
| 271 | + sparse_results = sparse_classifier.predict(X_test_sparse) |
269 | 272 |
|
270 | | - # Trained on sparse format |
271 | | - sparse_classifier = AdaBoostClassifier( |
272 | | - base_estimator=CustomSVC(), |
273 | | - random_state=1, |
274 | | - algorithm="SAMME", |
275 | | - **params |
276 | | - ).fit(X_train_sparse, y_train) |
277 | | - sparse_results = sparse_classifier.predict(X_test_sparse) |
| 273 | + # Trained on dense format |
| 274 | + dense_results = AdaBoostClassifier( |
| 275 | + base_estimator=CustomSVC(), |
| 276 | + random_state=1, |
| 277 | + algorithm="SAMME" |
| 278 | + ).fit(X_train, y_train).predict(X_test) |
278 | 279 |
|
279 | | - # Trained on dense format |
280 | | - dense_results = AdaBoostClassifier( |
281 | | - base_estimator=CustomSVC(), |
282 | | - random_state=1, |
283 | | - algorithm="SAMME", |
284 | | - **params |
285 | | - ).fit(X_train, y_train).predict(X_test) |
| 280 | + sparse_type = type(X_train_sparse) |
| 281 | + types = [i.data_type_ for i in sparse_classifier.estimators_] |
286 | 282 |
|
287 | | - sparse_type = type(X_train_sparse) |
288 | | - types = [i.data_type_ for i in sparse_classifier.estimators_] |
289 | | - |
290 | | - assert_array_equal(sparse_results, dense_results) |
291 | | - assert all([t == sparse_type for t in types]) |
| 283 | + assert_array_equal(sparse_results, dense_results) |
| 284 | + print(types) |
| 285 | + assert all([(t == csc_matrix or t == csr_matrix) |
| 286 | + for t in types]) |
292 | 287 |
|
293 | 288 |
|
294 | 289 | def test_sparse_regression(): |
295 | | - """Check regression for various parameter settings on sparse input.""" |
296 | | - |
| 290 | + """Check regression on sparse input.""" |
297 | 291 | class CustomSVR(SVR): |
298 | | - """SVR variant that records the nature of the training set""" |
| 292 | + |
| 293 | + """SVR variant that records the nature of the training set.""" |
299 | 294 |
|
300 | 295 | def fit(self, X, y, sample_weight=None): |
| 296 | + """Modification on fit caries data type for later verification.""" |
301 | 297 | super(CustomSVR, self).fit(X, y, sample_weight=sample_weight) |
302 | 298 | self.data_type_ = type(X) |
303 | 299 | return self |
304 | 300 |
|
305 | | - X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], |
306 | | - boston.target[:50], |
307 | | - random_state=rng) |
308 | | - parameter_sets = [ |
309 | | - {"learning_rate": 0.5, |
310 | | - "loss": 'linear'}, |
311 | | - {"learning_rate": 1.0, |
312 | | - "loss": 'linear'}, |
313 | | - {"learning_rate": 0.5, |
314 | | - "loss": 'square'}, |
315 | | - {"learning_rate": 1.0, |
316 | | - "loss": 'square'}, |
317 | | - {"learning_rate": 0.5, |
318 | | - "loss": 'exponential'}, |
319 | | - {"learning_rate": 1.0, |
320 | | - "loss": 'exponential'}, |
321 | | - ] |
322 | | - |
323 | | - for sparse_format in [csc_matrix, csr_matrix]: |
324 | | - X_train_sparse = sparse_format(X_train) |
325 | | - X_test_sparse = sparse_format(X_test) |
326 | | - |
327 | | - for params in parameter_sets: |
| 301 | + X, y = datasets.make_regression() |
328 | 302 |
|
329 | | - # Trained on sparse format |
330 | | - sparse_classifier = AdaBoostRegressor( |
331 | | - base_estimator=CustomSVR(), |
332 | | - random_state=1, |
333 | | - **params |
334 | | - ).fit(X_train_sparse, y_train) |
335 | | - sparse_results = sparse_classifier.predict(X_test_sparse) |
| 303 | + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) |
336 | 304 |
|
337 | | - # Trained on dense format |
338 | | - dense_results = AdaBoostRegressor( |
339 | | - base_estimator=CustomSVR(), |
340 | | - random_state=1, |
341 | | - **params |
342 | | - ).fit(X_train, y_train).predict(X_test) |
343 | | - |
344 | | - sparse_type = type(X_train_sparse) |
345 | | - types = [i.data_type_ for i in sparse_classifier.estimators_] |
| 305 | + for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, |
| 306 | + dok_matrix]: |
| 307 | + X_train_sparse = sparse_format(X_train) |
| 308 | + X_test_sparse = sparse_format(X_test) |
346 | 309 |
|
347 | | - assert_array_equal(sparse_results, dense_results) |
348 | | - assert all([t == sparse_type for t in types]) |
| 310 | + # Trained on sparse format |
| 311 | + sparse_classifier = AdaBoostRegressor( |
| 312 | + base_estimator=CustomSVR(), |
| 313 | + random_state=1 |
| 314 | + ).fit(X_train_sparse, y_train) |
| 315 | + sparse_results = sparse_classifier.predict(X_test_sparse) |
| 316 | + |
| 317 | + # Trained on dense format |
| 318 | + dense_results = AdaBoostRegressor( |
| 319 | + base_estimator=CustomSVR(), |
| 320 | + random_state=1 |
| 321 | + ).fit(X_train, y_train).predict(X_test) |
| 322 | + |
| 323 | + sparse_type = type(X_train_sparse) |
| 324 | + types = [i.data_type_ for i in sparse_classifier.estimators_] |
| 325 | + |
| 326 | + assert_array_equal(sparse_results, dense_results) |
| 327 | + print(types) |
| 328 | + assert all([(t == csc_matrix or t == csr_matrix) |
| 329 | + for t in types]) |
349 | 330 |
|
350 | 331 | if __name__ == "__main__": |
351 | 332 | import nose |
|
0 commit comments