[MRG] DOC run three more examples that use sys.argv (scikit-learn#10301)

lesteve · jnothman · commit 08ff565184c0 · 2017-12-13T23:02:39.000+11:00
diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py
@@ -88,6 +88,7 @@
 def is_interactive():
     return not hasattr(sys.modules['__main__'], '__file__')
 
+
 # work-around for Jupyter notebook and IPython console
 argv = [] if is_interactive() else sys.argv[1:]
 (opts, args) = op.parse_args(argv)
@@ -136,6 +137,7 @@ def is_interactive():
 def size_mb(docs):
     return sum(len(s.encode('utf-8')) for s in docs) / 1e6
 
+
 data_train_size_mb = size_mb(data_train.data)
 data_test_size_mb = size_mb(data_test.data)
 
diff --git a/examples/text/plot_document_clustering.py b/examples/text/plot_document_clustering.py
@@ -27,8 +27,8 @@
 Two algorithms are demoed: ordinary k-means and its more scalable cousin
 minibatch k-means.
 
-Additionally, latent semantic analysis can also be used to reduce dimensionality
-and discover latent patterns in the data.
+Additionally, latent semantic analysis can also be used to reduce
+dimensionality and discover latent patterns in the data.
 
 It can be noted that k-means (and minibatch k-means) are very sensitive to
 feature scaling and that in this case the IDF weighting helps improve the
@@ -106,6 +106,7 @@
 def is_interactive():
     return not hasattr(sys.modules['__main__'], '__file__')
 
+
 # work-around for Jupyter notebook and IPython console
 argv = [] if is_interactive() else sys.argv[1:]
 (opts, args) = op.parse_args(argv)
@@ -138,7 +139,8 @@ def is_interactive():
 labels = dataset.target
 true_k = np.unique(labels).shape[0]
 
-print("Extracting features from the training dataset using a sparse vectorizer")
+print("Extracting features from the training dataset "
+      "using a sparse vectorizer")
 t0 = time()
 if opts.use_hashing:
     if opts.use_idf:
diff --git a/examples/text/plot_hashing_vs_dict_vectorizer.py b/examples/text/plot_hashing_vs_dict_vectorizer.py
@@ -62,7 +62,7 @@ def token_freqs(doc):
     'talk.religion.misc',
 ]
 # Uncomment the following line to use a larger set (11k+ documents)
-#categories = None
+# categories = None
 
 print(__doc__)
 print("Usage: %s [n_features_for_hashing]" % sys.argv[0])

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ def token_freqs(doc):`
`62`	`62`	`'talk.religion.misc',`
`63`	`63`	`]`
`64`	`64`	`# Uncomment the following line to use a larger set (11k+ documents)`
`65`		`-#categories = None`
	`65`	`+# categories = None`
`66`	`66`
`67`	`67`	`print(__doc__)`
`68`	`68`	`print("Usage: %s [n_features_for_hashing]" % sys.argv[0])`