55
66import pytest
77import numpy as np
8+ import scipy .sparse as sparse
89
910from sklearn .base import BaseEstimator
1011from sklearn .base import ClassifierMixin
3839from sklearn .model_selection import KFold
3940
4041from sklearn .utils ._testing import assert_allclose
42+ from sklearn .utils ._testing import assert_allclose_dense_sparse
4143from sklearn .utils ._testing import ignore_warnings
4244from sklearn .utils .estimator_checks import check_estimator
4345from sklearn .utils .estimator_checks import check_no_attributes_set_in_init
5254@pytest .mark .parametrize (
5355 "final_estimator" , [None , RandomForestClassifier (random_state = 42 )]
5456)
55- def test_stacking_classifier_iris (cv , final_estimator ):
57+ @pytest .mark .parametrize ("passthrough" , [False , True ])
58+ def test_stacking_classifier_iris (cv , final_estimator , passthrough ):
5659 # prescale the data to avoid convergence warning without using a pipeline
5760 # for later assert
5861 X_train , X_test , y_train , y_test = train_test_split (
5962 scale (X_iris ), y_iris , stratify = y_iris , random_state = 42
6063 )
6164 estimators = [('lr' , LogisticRegression ()), ('svc' , LinearSVC ())]
6265 clf = StackingClassifier (
63- estimators = estimators , final_estimator = final_estimator , cv = cv
66+ estimators = estimators , final_estimator = final_estimator , cv = cv ,
67+ passthrough = passthrough
6468 )
6569 clf .fit (X_train , y_train )
6670 clf .predict (X_test )
6771 clf .predict_proba (X_test )
6872 assert clf .score (X_test , y_test ) > 0.8
6973
7074 X_trans = clf .transform (X_test )
71- assert X_trans .shape [1 ] == 6
75+ expected_column_count = 10 if passthrough else 6
76+ assert X_trans .shape [1 ] == expected_column_count
77+ if passthrough :
78+ assert_allclose (X_test , X_trans [:, - 4 :])
7279
7380 clf .set_params (lr = 'drop' )
7481 clf .fit (X_train , y_train )
@@ -79,7 +86,10 @@ def test_stacking_classifier_iris(cv, final_estimator):
7986 clf .decision_function (X_test )
8087
8188 X_trans = clf .transform (X_test )
82- assert X_trans .shape [1 ] == 3
89+ expected_column_count_drop = 7 if passthrough else 3
90+ assert X_trans .shape [1 ] == expected_column_count_drop
91+ if passthrough :
92+ assert_allclose (X_test , X_trans [:, - 4 :])
8393
8494
8595def test_stacking_classifier_drop_column_binary_classification ():
@@ -161,15 +171,18 @@ def test_stacking_regressor_drop_estimator():
161171 (RandomForestRegressor (random_state = 42 ), {}),
162172 (DummyRegressor (), {'return_std' : True })]
163173)
164- def test_stacking_regressor_diabetes (cv , final_estimator , predict_params ):
174+ @pytest .mark .parametrize ("passthrough" , [False , True ])
175+ def test_stacking_regressor_diabetes (cv , final_estimator , predict_params ,
176+ passthrough ):
165177 # prescale the data to avoid convergence warning without using a pipeline
166178 # for later assert
167179 X_train , X_test , y_train , _ = train_test_split (
168180 scale (X_diabetes ), y_diabetes , random_state = 42
169181 )
170182 estimators = [('lr' , LinearRegression ()), ('svr' , LinearSVR ())]
171183 reg = StackingRegressor (
172- estimators = estimators , final_estimator = final_estimator , cv = cv
184+ estimators = estimators , final_estimator = final_estimator , cv = cv ,
185+ passthrough = passthrough
173186 )
174187 reg .fit (X_train , y_train )
175188 result = reg .predict (X_test , ** predict_params )
@@ -178,14 +191,58 @@ def test_stacking_regressor_diabetes(cv, final_estimator, predict_params):
178191 assert len (result ) == expected_result_length
179192
180193 X_trans = reg .transform (X_test )
181- assert X_trans .shape [1 ] == 2
194+ expected_column_count = 12 if passthrough else 2
195+ assert X_trans .shape [1 ] == expected_column_count
196+ if passthrough :
197+ assert_allclose (X_test , X_trans [:, - 10 :])
182198
183199 reg .set_params (lr = 'drop' )
184200 reg .fit (X_train , y_train )
185201 reg .predict (X_test )
186202
187203 X_trans = reg .transform (X_test )
188- assert X_trans .shape [1 ] == 1
204+ expected_column_count_drop = 11 if passthrough else 1
205+ assert X_trans .shape [1 ] == expected_column_count_drop
206+ if passthrough :
207+ assert_allclose (X_test , X_trans [:, - 10 :])
208+
209+
210+ @pytest .mark .parametrize ('fmt' , ['csc' , 'csr' , 'coo' ])
211+ def test_stacking_regressor_sparse_passthrough (fmt ):
212+ # Check passthrough behavior on a sparse X matrix
213+ X_train , X_test , y_train , _ = train_test_split (
214+ sparse .coo_matrix (scale (X_diabetes )).asformat (fmt ),
215+ y_diabetes , random_state = 42
216+ )
217+ estimators = [('lr' , LinearRegression ()), ('svr' , LinearSVR ())]
218+ rf = RandomForestRegressor (n_estimators = 10 , random_state = 42 )
219+ clf = StackingRegressor (
220+ estimators = estimators , final_estimator = rf , cv = 5 , passthrough = True
221+ )
222+ clf .fit (X_train , y_train )
223+ X_trans = clf .transform (X_test )
224+ assert_allclose_dense_sparse (X_test , X_trans [:, - 10 :])
225+ assert sparse .issparse (X_trans )
226+ assert X_test .format == X_trans .format
227+
228+
229+ @pytest .mark .parametrize ('fmt' , ['csc' , 'csr' , 'coo' ])
230+ def test_stacking_classifier_sparse_passthrough (fmt ):
231+ # Check passthrough behavior on a sparse X matrix
232+ X_train , X_test , y_train , _ = train_test_split (
233+ sparse .coo_matrix (scale (X_iris )).asformat (fmt ),
234+ y_iris , random_state = 42
235+ )
236+ estimators = [('lr' , LogisticRegression ()), ('svc' , LinearSVC ())]
237+ rf = RandomForestClassifier (n_estimators = 10 , random_state = 42 )
238+ clf = StackingClassifier (
239+ estimators = estimators , final_estimator = rf , cv = 5 , passthrough = True
240+ )
241+ clf .fit (X_train , y_train )
242+ X_trans = clf .transform (X_test )
243+ assert_allclose_dense_sparse (X_test , X_trans [:, - 4 :])
244+ assert sparse .issparse (X_trans )
245+ assert X_test .format == X_trans .format
189246
190247
191248def test_stacking_classifier_drop_binary_prob ():
0 commit comments