Skip to content

Commit 7f11b9b

Browse files
authored
Merge pull request rasbt#1042 from rasbt/sklearn-deprecations
fix sklearn boston deprecations
2 parents 8ae4570 + 6ad29e0 commit 7f11b9b

File tree

3 files changed

+60
-69
lines changed

3 files changed

+60
-69
lines changed

mlxtend/feature_selection/tests/test_column_selector.py

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from packaging.version import Version
1212
from sklearn import __version__ as sklearn_version
1313
from sklearn import datasets
14-
from sklearn.linear_model import LinearRegression, LogisticRegression
14+
from sklearn.linear_model import LogisticRegression
1515
from sklearn.model_selection import GridSearchCV
1616
from sklearn.pipeline import make_pipeline
1717

@@ -64,68 +64,69 @@ def test_ColumnSelector_in_gridsearch():
6464

6565

6666
def test_ColumnSelector_with_dataframe():
67-
boston = datasets.load_boston()
68-
df_in = pd.DataFrame(boston.data, columns=boston.feature_names)
69-
df_out = ColumnSelector(cols=("ZN", "CRIM")).transform(df_in)
70-
assert df_out.shape == (506, 2)
67+
iris = datasets.load_iris()
68+
df_in = pd.DataFrame(iris.data, columns=iris.feature_names)
69+
df_out = ColumnSelector(cols=("sepal length (cm)", "sepal width (cm)")).transform(
70+
df_in
71+
)
72+
assert df_out.shape == (150, 2)
7173

7274

7375
def test_ColumnSelector_with_dataframe_and_int_columns():
74-
boston = datasets.load_boston()
75-
df_in = pd.DataFrame(boston.data, columns=boston.feature_names)
76-
df_out_str = ColumnSelector(cols=("INDUS", "CHAS")).transform(df_in)
76+
iris = datasets.load_iris()
77+
df_in = pd.DataFrame(iris.data, columns=iris.feature_names)
78+
df_out_str = ColumnSelector(
79+
cols=("petal length (cm)", "petal width (cm)")
80+
).transform(df_in)
7781
df_out_int = ColumnSelector(cols=(2, 3)).transform(df_in)
7882

7983
np.testing.assert_array_equal(df_out_str[:, 0], df_out_int[:, 0])
8084
np.testing.assert_array_equal(df_out_str[:, 1], df_out_int[:, 1])
8185

8286

8387
def test_ColumnSelector_with_dataframe_drop_axis():
84-
boston = datasets.load_boston()
85-
df_in = pd.DataFrame(boston.data, columns=boston.feature_names)
86-
X1_out = ColumnSelector(cols="ZN", drop_axis=True).transform(df_in)
87-
assert X1_out.shape == (506,)
88+
iris = datasets.load_iris()
89+
df_in = pd.DataFrame(iris.data, columns=iris.feature_names)
90+
X1_out = ColumnSelector(cols=("petal length (cm)",), drop_axis=True).transform(
91+
df_in
92+
)
93+
assert X1_out.shape == (150,)
8894

89-
X1_out = ColumnSelector(cols=("ZN",), drop_axis=True).transform(df_in)
90-
assert X1_out.shape == (506,)
95+
X1_out = ColumnSelector(cols=("petal length (cm)",), drop_axis=True).transform(
96+
df_in
97+
)
98+
assert X1_out.shape == (150,)
9199

92-
X1_out = ColumnSelector(cols="ZN").transform(df_in)
93-
assert X1_out.shape == (506, 1)
100+
X1_out = ColumnSelector(cols="petal length (cm)").transform(df_in)
101+
assert X1_out.shape == (150, 1)
94102

95-
X1_out = ColumnSelector(cols=("ZN",)).transform(df_in)
96-
assert X1_out.shape == (506, 1)
103+
X1_out = ColumnSelector(cols=("petal length (cm)",)).transform(df_in)
104+
assert X1_out.shape == (150, 1)
97105

98106

99107
def test_ColumnSelector_with_dataframe_in_gridsearch():
100-
boston = datasets.load_boston()
101-
X = pd.DataFrame(boston.data, columns=boston.feature_names)
102-
y = boston.target
103-
pipe = make_pipeline(ColumnSelector(), LinearRegression())
108+
iris = datasets.load_iris()
109+
X = pd.DataFrame(iris.data, columns=iris.feature_names)
110+
y = iris.target
111+
pipe = make_pipeline(ColumnSelector(), LogisticRegression())
104112
grid = {
105-
"columnselector__cols": [["ZN", "RM"], ["ZN", "RM", "AGE"], "ZN", ["RM"]],
106-
"linearregression__copy_X": [True, False],
107-
"linearregression__fit_intercept": [True, False],
113+
"columnselector__cols": [
114+
["petal length (cm)", "petal width (cm)"],
115+
["sepal length (cm)", "sepal width (cm)", "petal width (cm)"],
116+
],
108117
}
109118

110-
if Version(sklearn_version) < Version("0.24.1"):
111-
gsearch1 = GridSearchCV(
112-
estimator=pipe,
113-
param_grid=grid,
114-
cv=5,
115-
n_jobs=1,
116-
iid=False,
117-
scoring="neg_mean_squared_error",
118-
refit=False,
119-
)
120-
else:
121-
gsearch1 = GridSearchCV(
122-
estimator=pipe,
123-
param_grid=grid,
124-
cv=5,
125-
n_jobs=1,
126-
scoring="neg_mean_squared_error",
127-
refit=False,
128-
)
119+
gsearch1 = GridSearchCV(
120+
estimator=pipe,
121+
param_grid=grid,
122+
cv=5,
123+
n_jobs=1,
124+
scoring="accuracy",
125+
refit=False,
126+
)
129127

130128
gsearch1.fit(X, y)
131-
assert gsearch1.best_params_["columnselector__cols"] == ["ZN", "RM", "AGE"]
129+
assert gsearch1.best_params_["columnselector__cols"] == [
130+
"petal length (cm)",
131+
"petal width (cm)",
132+
]

mlxtend/feature_selection/tests/test_sequential_feature_selector.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from numpy.testing import assert_almost_equal
1010
from packaging.version import Version
1111
from sklearn import __version__ as sklearn_version
12-
from sklearn.datasets import load_boston, load_iris
12+
from sklearn.datasets import load_iris
1313
from sklearn.decomposition import PCA
1414
from sklearn.ensemble import RandomForestClassifier
1515
from sklearn.linear_model import LinearRegression
@@ -19,6 +19,7 @@
1919
from sklearn.pipeline import Pipeline
2020

2121
from mlxtend.classifier import SoftmaxRegression
22+
from mlxtend.data import boston_housing_data
2223
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
2324
from mlxtend.utils import assert_raises
2425

@@ -421,8 +422,7 @@ def test_knn_scoring_metric():
421422

422423

423424
def test_regression():
424-
boston = load_boston()
425-
X, y = boston.data, boston.target
425+
X, y = boston_housing_data()
426426
lr = LinearRegression()
427427
sfs_r = SFS(
428428
lr,
@@ -443,8 +443,7 @@ def test_regression():
443443

444444

445445
def test_regression_sffs():
446-
boston = load_boston()
447-
X, y = boston.data, boston.target
446+
X, y = boston_housing_data()
448447
lr = LinearRegression()
449448
sfs_r = SFS(
450449
lr,
@@ -460,8 +459,7 @@ def test_regression_sffs():
460459

461460

462461
def test_regression_sbfs():
463-
boston = load_boston()
464-
X, y = boston.data, boston.target
462+
X, y = boston_housing_data()
465463
lr = LinearRegression()
466464
sfs_r = SFS(
467465
lr,
@@ -477,8 +475,7 @@ def test_regression_sbfs():
477475

478476

479477
def test_regression_in_range():
480-
boston = load_boston()
481-
X, y = boston.data, boston.target
478+
X, y = boston_housing_data()
482479
lr = LinearRegression()
483480
sfs_r = SFS(
484481
lr,
@@ -722,9 +719,7 @@ def test_string_scoring_clf():
722719

723720

724721
def test_max_feature_subset_size_in_tuple_range():
725-
boston = load_boston()
726-
X, y = boston.data, boston.target
727-
722+
X, y = boston_housing_data()
728723
lr = LinearRegression()
729724

730725
sfs = SFS(
@@ -741,8 +736,7 @@ def test_max_feature_subset_size_in_tuple_range():
741736

742737

743738
def test_max_feature_subset_best():
744-
boston = load_boston()
745-
X, y = boston.data, boston.target
739+
X, y = boston_housing_data()
746740
lr = LinearRegression()
747741

748742
sfs = SFS(lr, k_features="best", forward=True, floating=False, cv=10)
@@ -752,8 +746,7 @@ def test_max_feature_subset_best():
752746

753747

754748
def test_max_feature_subset_parsimonious():
755-
boston = load_boston()
756-
X, y = boston.data, boston.target
749+
X, y = boston_housing_data()
757750
lr = LinearRegression()
758751

759752
sfs = SFS(lr, k_features="parsimonious", forward=True, floating=False, cv=10)

mlxtend/feature_selection/tests/test_sequential_feature_selector_feature_groups.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
#
55
# License: BSD 3 clause
66
import numpy as np
7-
from numpy import nan
87
from numpy.testing import assert_almost_equal
9-
from sklearn.datasets import load_boston, load_iris
8+
from sklearn.datasets import load_iris
109
from sklearn.ensemble import RandomForestClassifier
1110
from sklearn.linear_model import LinearRegression
1211
from sklearn.metrics import roc_auc_score
1312
from sklearn.neighbors import KNeighborsClassifier
1413

14+
from mlxtend.data import boston_housing_data
1515
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
1616
from mlxtend.utils import assert_raises
1717

@@ -99,8 +99,7 @@ def test_knn_wo_cv_feature_groups_default():
9999

100100

101101
def test_regression_sbfs():
102-
boston = load_boston()
103-
X, y = boston.data, boston.target
102+
X, y = boston_housing_data()
104103
lr = LinearRegression()
105104
sfs_r = SFS(
106105
lr,
@@ -163,8 +162,7 @@ def test_keyboard_interrupt():
163162

164163

165164
def test_max_feature_subset_best():
166-
boston = load_boston()
167-
X, y = boston.data, boston.target
165+
X, y = boston_housing_data()
168166
lr = LinearRegression()
169167

170168
sfs = SFS(
@@ -189,8 +187,7 @@ def test_max_feature_subset_best():
189187

190188

191189
def test_max_feature_subset_parsimonious():
192-
boston = load_boston()
193-
X, y = boston.data, boston.target
190+
X, y = boston_housing_data()
194191
lr = LinearRegression()
195192

196193
sfs = SFS(

0 commit comments

Comments
 (0)