Skip to content

Commit faaeba4

Browse files
NicolasHugjnothman
authored andcommitted
DOC Release highlights order (scikit-learn#15552)
1 parent 2e881f5 commit faaeba4

File tree

2 files changed

+121
-109
lines changed

2 files changed

+121
-109
lines changed

doc/whats_new/v0.22.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ refer to
1515

1616
.. include:: changelog_legend.inc
1717

18+
Website update
19+
--------------
20+
21+
`Our website <https://scikit-learn.org/>`_ was revamped and given a fresh
22+
new look. :pr:`14849` by `Thomas Fan`_.
23+
1824
Clear definition of the public API
1925
----------------------------------
2026

@@ -51,6 +57,10 @@ been moved to ``_birch.py``).
5157
reference, that means it should either be private or documented. Please
5258
let us know by opening an issue!
5359

60+
This work was tracked in `issue 9250
61+
<https://github.com/scikit-learn/scikit-learn/issues/9250>`_ and `issue
62+
12927 <https://github.com/scikit-learn/scikit-learn/issues/12927>`_.
63+
5464

5565
Deprecations: using ``FutureWarning`` from now on
5666
-------------------------------------------------

examples/release_highlights/plot_release_highlights_0_22_0.py

Lines changed: 111 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,73 @@
2020
"""
2121

2222
##############################################################################
23-
# KNN Based Imputation
24-
# ------------------------------------
25-
# We now support imputation for completing missing values using k-Nearest
26-
# Neighbors.
23+
# New plotting API
24+
# ----------------
2725
#
28-
# Each sample's missing values are imputed using the mean value from
29-
# ``n_neighbors`` nearest neighbors found in the training set. Two samples are
30-
# close if the features that neither is missing are close.
31-
# By default, a euclidean distance metric
32-
# that supports missing values,
33-
# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
34-
# neighbors.
26+
# A new plotting API is available for creating visualizations. This new API
27+
# allows for quickly adjusting the visuals of a plot without involving any
28+
# recomputation. It is also possible to add different plots to the same
29+
# figure. See more examples in the :ref:`User Guide <visualizations>`.
30+
31+
from sklearn.model_selection import train_test_split
32+
from sklearn.svm import SVC
33+
from sklearn.metrics import plot_roc_curve
34+
from sklearn.ensemble import RandomForestClassifier
35+
from sklearn.datasets import make_classification
36+
import matplotlib.pyplot as plt
37+
38+
X, y = make_classification(random_state=0)
39+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
40+
41+
svc = SVC(random_state=42)
42+
svc.fit(X_train, y_train)
43+
rfc = RandomForestClassifier(random_state=42)
44+
rfc.fit(X_train, y_train)
45+
46+
svc_disp = plot_roc_curve(svc, X_test, y_test)
47+
rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=svc_disp.ax_)
48+
rfc_disp.figure_.suptitle("ROC curve comparison")
49+
50+
plt.show()
51+
52+
############################################################################
53+
# Stacking Classifier and Regressor
54+
# ---------------------------------
55+
# :class:`~ensemble.StackingClassifier` and
56+
# :class:`~ensemble.StackingRegressor`
57+
# allow you to have a stack of estimators with a final classifier or
58+
# a regressor.
59+
# Stacked generalization consists in stacking the output of individual
60+
# estimators and use a classifier to compute the final prediction. Stacking
61+
# allows to use the strength of each individual estimator by using their output
62+
# as input of a final estimator.
63+
# Base estimators are fitted on the full ``X`` while
64+
# the final estimator is trained using cross-validated predictions of the
65+
# base estimators using ``cross_val_predict``.
3566
#
36-
# Read more in the :ref:`User Guide <knnimpute>`.
67+
# Read more in the :ref:`User Guide <stacking>`.
3768

38-
import numpy as np
39-
from sklearn.impute import KNNImputer
69+
from sklearn.datasets import load_iris
70+
from sklearn.svm import LinearSVC
71+
from sklearn.linear_model import LogisticRegression
72+
from sklearn.preprocessing import StandardScaler
73+
from sklearn.pipeline import make_pipeline
74+
from sklearn.ensemble import StackingClassifier
75+
from sklearn.model_selection import train_test_split
4076

41-
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
42-
imputer = KNNImputer(n_neighbors=2)
43-
print(imputer.fit_transform(X))
77+
X, y = load_iris(return_X_y=True)
78+
estimators = [
79+
('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
80+
('svr', make_pipeline(StandardScaler(),
81+
LinearSVC(random_state=42)))
82+
]
83+
clf = StackingClassifier(
84+
estimators=estimators, final_estimator=LogisticRegression()
85+
)
86+
X_train, X_test, y_train, y_test = train_test_split(
87+
X, y, stratify=y, random_state=42
88+
)
89+
clf.fit(X_train, y_train).score(X_test, y_test)
4490

4591
##############################################################################
4692
# Permutation-based feature importance
@@ -50,9 +96,7 @@
5096
# estimate of the importance of each feature, for any fitted estimator:
5197

5298
from sklearn.ensemble import RandomForestClassifier
53-
from sklearn.datasets import make_classification
5499
from sklearn.inspection import permutation_importance
55-
import matplotlib.pyplot as plt
56100

57101
X, y = make_classification(random_state=0, n_features=5, n_informative=3)
58102
rf = RandomForestClassifier(random_state=0).fit(X, y)
@@ -87,32 +131,60 @@
87131
gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y)
88132
print(gbdt.predict(X))
89133

90-
##############################################################################
91-
# New plotting API
92-
# ----------------
93-
#
94-
# A new plotting API is available for creating visualizations. This new API
95-
# allows for quickly adjusting the visuals of a plot without involving any
96-
# recomputation. It is also possible to add different plots to the same
97-
# figure. See more examples in the :ref:`User Guide <visualizations>`.
134+
############################################################################
135+
# Precomputed sparse nearest neighbors graph
136+
# ------------------------------------------
137+
# Most estimators based on nearest neighbors graphs now accept precomputed
138+
# sparse graphs as input, to reuse the same graph for multiple estimator fits.
139+
# To use this feature in a pipeline, one can use the `memory` parameter, along
140+
# with one of the two new transformers,
141+
# :class:`neighbors.KNeighborsTransformer` and
142+
# :class:`neighbors.RadiusNeighborsTransformer`. The precomputation
143+
# can also be performed by custom estimators to use alternative
144+
# implementations, such as approximate nearest neighbors methods.
145+
# See more details in the :ref:`User Guide <neighbors_transformer>`.
98146

99-
from sklearn.model_selection import train_test_split
100-
from sklearn.svm import SVC
101-
from sklearn.metrics import plot_roc_curve
147+
from tempfile import TemporaryDirectory
148+
from sklearn.neighbors import KNeighborsTransformer
149+
from sklearn.manifold import Isomap
150+
from sklearn.pipeline import make_pipeline
102151

103152
X, y = make_classification(random_state=0)
104-
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
105153

106-
svc = SVC(random_state=42)
107-
svc.fit(X_train, y_train)
108-
rfc = RandomForestClassifier(random_state=42)
109-
rfc.fit(X_train, y_train)
154+
with TemporaryDirectory(prefix="sklearn_cache_") as tmpdir:
155+
estimator = make_pipeline(
156+
KNeighborsTransformer(n_neighbors=10, mode='distance'),
157+
Isomap(n_neighbors=10, metric='precomputed'),
158+
memory=tmpdir)
159+
estimator.fit(X)
110160

111-
svc_disp = plot_roc_curve(svc, X_test, y_test)
112-
rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=svc_disp.ax_)
113-
rfc_disp.figure_.suptitle("ROC curve comparison")
161+
# We can decrease the number of neighbors and the graph will not be
162+
# recomputed.
163+
estimator.set_params(isomap__n_neighbors=5)
164+
estimator.fit(X)
114165

115-
plt.show()
166+
##############################################################################
167+
# KNN Based Imputation
168+
# ------------------------------------
169+
# We now support imputation for completing missing values using k-Nearest
170+
# Neighbors.
171+
#
172+
# Each sample's missing values are imputed using the mean value from
173+
# ``n_neighbors`` nearest neighbors found in the training set. Two samples are
174+
# close if the features that neither is missing are close.
175+
# By default, a euclidean distance metric
176+
# that supports missing values,
177+
# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
178+
# neighbors.
179+
#
180+
# Read more in the :ref:`User Guide <knnimpute>`.
181+
182+
import numpy as np
183+
from sklearn.impute import KNNImputer
184+
185+
X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
186+
imputer = KNNImputer(n_neighbors=2)
187+
print(imputer.fit_transform(X))
116188

117189
#############################################################################
118190
# Tree pruning
@@ -143,76 +215,6 @@
143215
titanic = fetch_openml('titanic', version=1, as_frame=True)
144216
print(titanic.data.head()[['pclass', 'embarked']])
145217

146-
############################################################################
147-
# Precomputed sparse nearest neighbors graph
148-
# ------------------------------------------
149-
# Most estimators based on nearest neighbors graphs now accept precomputed
150-
# sparse graphs as input, to reuse the same graph for multiple estimator fits.
151-
# To use this feature in a pipeline, one can use the `memory` parameter, along
152-
# with one of the two new transformers,
153-
# :class:`neighbors.KNeighborsTransformer` and
154-
# :class:`neighbors.RadiusNeighborsTransformer`. The precomputation
155-
# can also be performed by custom estimators to use alternative
156-
# implementations, such as approximate nearest neighbors methods.
157-
# See more details in the :ref:`User Guide <neighbors_transformer>`.
158-
159-
from tempfile import TemporaryDirectory
160-
from sklearn.neighbors import KNeighborsTransformer
161-
from sklearn.manifold import Isomap
162-
from sklearn.pipeline import make_pipeline
163-
164-
with TemporaryDirectory(prefix="sklearn_cache_") as tmpdir:
165-
estimator = make_pipeline(
166-
KNeighborsTransformer(n_neighbors=10, mode='distance'),
167-
Isomap(n_neighbors=10, metric='precomputed'),
168-
memory=tmpdir)
169-
estimator.fit(X)
170-
171-
# We can decrease the number of neighbors and the graph will not be
172-
# recomputed.
173-
estimator.set_params(isomap__n_neighbors=5)
174-
estimator.fit(X)
175-
176-
############################################################################
177-
# Stacking Classifier and Regressor
178-
# ---------------------------------
179-
# :class:`~ensemble.StackingClassifier` and
180-
# :class:`~ensemble.StackingRegressor`
181-
# allow you to have a stack of estimators with a final classifier or
182-
# a regressor.
183-
# Stacked generalization consists in stacking the output of individual
184-
# estimators and use a classifier to compute the final prediction. Stacking
185-
# allows to use the strength of each individual estimator by using their output
186-
# as input of a final estimator.
187-
# Base estimators are fitted on the full ``X`` while
188-
# the final estimator is trained using cross-validated predictions of the
189-
# base estimators using ``cross_val_predict``.
190-
#
191-
# Read more in the :ref:`User Guide <stacking>`.
192-
193-
from sklearn.datasets import load_iris
194-
from sklearn.ensemble import RandomForestClassifier
195-
from sklearn.svm import LinearSVC
196-
from sklearn.linear_model import LogisticRegression
197-
from sklearn.preprocessing import StandardScaler
198-
from sklearn.pipeline import make_pipeline
199-
from sklearn.ensemble import StackingClassifier
200-
from sklearn.model_selection import train_test_split
201-
202-
X, y = load_iris(return_X_y=True)
203-
estimators = [
204-
('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
205-
('svr', make_pipeline(StandardScaler(),
206-
LinearSVC(random_state=42)))
207-
]
208-
clf = StackingClassifier(
209-
estimators=estimators, final_estimator=LogisticRegression()
210-
)
211-
X_train, X_test, y_train, y_test = train_test_split(
212-
X, y, stratify=y, random_state=42
213-
)
214-
clf.fit(X_train, y_train).score(X_test, y_test)
215-
216218
############################################################################
217219
# Checking scikit-learn compatibility of an estimator
218220
# ---------------------------------------------------

0 commit comments

Comments
 (0)