EXA Show the advantage of PCA in plot_digits_pipe.py (scikit-learn#14348)

qinhanmin2014 · glemaitre · commit d1f58339b540 · 2019-07-18T14:15:47.000+02:00
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
@@ -26,26 +26,26 @@
 
 from sklearn import datasets
 from sklearn.decomposition import PCA
-from sklearn.linear_model import SGDClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
 
 
 # Define a pipeline to search for the best combination of PCA truncation
 # and classifier regularization.
-logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,
-                         max_iter=10000, tol=1e-5, random_state=0)
 pca = PCA()
+# set the tolerance to a large value to make the example faster
+logistic = LogisticRegression(max_iter=10000, tol=0.1)
 pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
 
 X_digits, y_digits = datasets.load_digits(return_X_y=True)
 
 # Parameters of pipelines can be set using ‘__’ separated parameter names:
 param_grid = {
     'pca__n_components': [5, 20, 30, 40, 50, 64],
-    'logistic__alpha': np.logspace(-4, 4, 5),
+    'logistic__C': np.logspace(-4, 4, 5),
 }
-search = GridSearchCV(pipe, param_grid)
+search = GridSearchCV(pipe, param_grid, n_jobs=-1)
 search.fit(X_digits, y_digits)
 print("Best parameter (CV score=%0.3f):" % search.best_score_)
 print(search.best_params_)