Skip to content

Commit ea8f194

Browse files
committed
In functions def test_sparse_classification() and test_sparse_regression() added tests for coo, lil, and dok sparse matricies. Removed parameter set feautre since there are not parameters that alter sparse/dense data usage. Revised data set to use dataset.make_{regression/classification} for improved test time.
1 parent bcdc26d commit ea8f194

File tree

1 file changed

+61
-80
lines changed

1 file changed

+61
-80
lines changed

sklearn/ensemble/tests/test_weight_boosting.py

Lines changed: 61 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
"""
2-
Testing for the boost module (sklearn.ensemble.boost).
3-
"""
1+
"""Testing for the boost module (sklearn.ensemble.boost)."""
42

53
import numpy as np
64
from numpy.testing import assert_array_equal, assert_array_less
@@ -15,7 +13,8 @@
1513
from sklearn.svm import SVC, SVR
1614
from sklearn.utils import shuffle
1715
from sklearn.cross_validation import train_test_split
18-
from scipy.sparse import csc_matrix, csr_matrix
16+
from scipy.sparse import csc_matrix, csr_matrix, coo_matrix, dok_matrix
17+
from scipy.sparse import lil_matrix
1918
from sklearn import datasets
2019

2120

@@ -243,109 +242,91 @@ def test_base_estimator():
243242

244243

245244
def test_sparse_classification():
246-
"""Check classification for various parameter settings on sparse input."""
247-
245+
"""Check classification on sparse input."""
248246
class CustomSVC(SVC):
249-
"""SVC variant that records the nature of the training set"""
247+
248+
"""SVC variant that records the nature of the training set."""
250249

251250
def fit(self, X, y, sample_weight=None):
251+
"""Modification on fit caries data type for later verification."""
252252
super(CustomSVC, self).fit(X, y, sample_weight=sample_weight)
253253
self.data_type_ = type(X)
254254
return self
255255

256-
X_train, X_test, y_train, y_test = train_test_split(iris.data,
257-
iris.target,
258-
random_state=rng)
259-
parameter_sets = [
260-
{"learning_rate": 0.5},
261-
{"learning_rate": 1.0},
262-
]
256+
X, y = datasets.make_classification()
257+
258+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
263259

264-
for sparse_format in [csc_matrix, csr_matrix]:
260+
for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix,
261+
dok_matrix]:
265262
X_train_sparse = sparse_format(X_train)
266263
X_test_sparse = sparse_format(X_test)
267264

268-
for params in parameter_sets:
265+
# Trained on sparse format
266+
sparse_classifier = AdaBoostClassifier(
267+
base_estimator=CustomSVC(),
268+
random_state=1,
269+
algorithm="SAMME"
270+
).fit(X_train_sparse, y_train)
271+
sparse_results = sparse_classifier.predict(X_test_sparse)
269272

270-
# Trained on sparse format
271-
sparse_classifier = AdaBoostClassifier(
272-
base_estimator=CustomSVC(),
273-
random_state=1,
274-
algorithm="SAMME",
275-
**params
276-
).fit(X_train_sparse, y_train)
277-
sparse_results = sparse_classifier.predict(X_test_sparse)
273+
# Trained on dense format
274+
dense_results = AdaBoostClassifier(
275+
base_estimator=CustomSVC(),
276+
random_state=1,
277+
algorithm="SAMME"
278+
).fit(X_train, y_train).predict(X_test)
278279

279-
# Trained on dense format
280-
dense_results = AdaBoostClassifier(
281-
base_estimator=CustomSVC(),
282-
random_state=1,
283-
algorithm="SAMME",
284-
**params
285-
).fit(X_train, y_train).predict(X_test)
280+
sparse_type = type(X_train_sparse)
281+
types = [i.data_type_ for i in sparse_classifier.estimators_]
286282

287-
sparse_type = type(X_train_sparse)
288-
types = [i.data_type_ for i in sparse_classifier.estimators_]
289-
290-
assert_array_equal(sparse_results, dense_results)
291-
assert all([t == sparse_type for t in types])
283+
assert_array_equal(sparse_results, dense_results)
284+
print(types)
285+
assert all([(t == csc_matrix or t == csr_matrix)
286+
for t in types])
292287

293288

294289
def test_sparse_regression():
295-
"""Check regression for various parameter settings on sparse input."""
296-
290+
"""Check regression on sparse input."""
297291
class CustomSVR(SVR):
298-
"""SVR variant that records the nature of the training set"""
292+
293+
"""SVR variant that records the nature of the training set."""
299294

300295
def fit(self, X, y, sample_weight=None):
296+
"""Modification on fit caries data type for later verification."""
301297
super(CustomSVR, self).fit(X, y, sample_weight=sample_weight)
302298
self.data_type_ = type(X)
303299
return self
304300

305-
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
306-
boston.target[:50],
307-
random_state=rng)
308-
parameter_sets = [
309-
{"learning_rate": 0.5,
310-
"loss": 'linear'},
311-
{"learning_rate": 1.0,
312-
"loss": 'linear'},
313-
{"learning_rate": 0.5,
314-
"loss": 'square'},
315-
{"learning_rate": 1.0,
316-
"loss": 'square'},
317-
{"learning_rate": 0.5,
318-
"loss": 'exponential'},
319-
{"learning_rate": 1.0,
320-
"loss": 'exponential'},
321-
]
322-
323-
for sparse_format in [csc_matrix, csr_matrix]:
324-
X_train_sparse = sparse_format(X_train)
325-
X_test_sparse = sparse_format(X_test)
326-
327-
for params in parameter_sets:
301+
X, y = datasets.make_regression()
328302

329-
# Trained on sparse format
330-
sparse_classifier = AdaBoostRegressor(
331-
base_estimator=CustomSVR(),
332-
random_state=1,
333-
**params
334-
).fit(X_train_sparse, y_train)
335-
sparse_results = sparse_classifier.predict(X_test_sparse)
303+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
336304

337-
# Trained on dense format
338-
dense_results = AdaBoostRegressor(
339-
base_estimator=CustomSVR(),
340-
random_state=1,
341-
**params
342-
).fit(X_train, y_train).predict(X_test)
343-
344-
sparse_type = type(X_train_sparse)
345-
types = [i.data_type_ for i in sparse_classifier.estimators_]
305+
for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix,
306+
dok_matrix]:
307+
X_train_sparse = sparse_format(X_train)
308+
X_test_sparse = sparse_format(X_test)
346309

347-
assert_array_equal(sparse_results, dense_results)
348-
assert all([t == sparse_type for t in types])
310+
# Trained on sparse format
311+
sparse_classifier = AdaBoostRegressor(
312+
base_estimator=CustomSVR(),
313+
random_state=1
314+
).fit(X_train_sparse, y_train)
315+
sparse_results = sparse_classifier.predict(X_test_sparse)
316+
317+
# Trained on dense format
318+
dense_results = AdaBoostRegressor(
319+
base_estimator=CustomSVR(),
320+
random_state=1
321+
).fit(X_train, y_train).predict(X_test)
322+
323+
sparse_type = type(X_train_sparse)
324+
types = [i.data_type_ for i in sparse_classifier.estimators_]
325+
326+
assert_array_equal(sparse_results, dense_results)
327+
print(types)
328+
assert all([(t == csc_matrix or t == csr_matrix)
329+
for t in types])
349330

350331
if __name__ == "__main__":
351332
import nose

0 commit comments

Comments
 (0)