Skip to content

Commit 5d97a11

Browse files
committed
DOC: change the example to lighter dataset
1 parent 10d4cab commit 5d97a11

File tree

1 file changed

+28
-20
lines changed

1 file changed

+28
-20
lines changed

examples/cluster/plot_dict_face_patches.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
and extract randomly 15 patches from this image. Once we have accumulated
1212
750 of these patches (using 50 images), we run the `partial_fit` method
1313
of the online KMeans object, MiniBatchKMeans.
14+
15+
The verbose setting on the MiniBatchKMeans enables us to see that some
16+
clusters are reassigned during the successive calls to
17+
partial-fit. This is because the number of patches that they represent
18+
has become too low, and it is better to choose a random new
19+
cluster.
1420
"""
1521
print __doc__
1622

@@ -24,37 +30,38 @@
2430
from sklearn.cluster import MiniBatchKMeans
2531
from sklearn.feature_extraction.image import extract_patches_2d
2632

27-
faces = datasets.fetch_lfw_people()
28-
data = faces.data
33+
faces = datasets.fetch_olivetti_faces()
2934

3035
###############################################################################
3136
# Learn the dictionary of images
3237

3338
print 'Learning the dictionary... '
3439
rng = np.random.RandomState(0)
35-
kmeans = MiniBatchKMeans(n_clusters=81)
40+
kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)
3641
patch_size = (20, 20)
3742

3843
buffer = []
3944
index = 1
4045
t0 = time.time()
4146

42-
# The online learning part
43-
for index, img in enumerate(faces.images):
44-
data = extract_patches_2d(img, patch_size,
45-
max_patches=15, random_state=rng)
46-
data = np.reshape(data, (len(data), -1))
47-
buffer.append(data)
48-
index += 1
49-
if index % 50 == 0:
50-
data = np.concatenate(buffer, axis=0)
51-
data -= np.mean(data, axis=0)
52-
data /= np.std(data, axis=0)
53-
kmeans.partial_fit(data)
54-
buffer = []
55-
if index % 500 == 0:
56-
print 'Partial fit of %4i out of %i' % (index,
57-
len(faces.images))
47+
# The online learning part: cycle over the whole dataset 4 times
48+
index = 0
49+
for _ in range(6):
50+
for img in faces.images:
51+
data = extract_patches_2d(img, patch_size,
52+
max_patches=50, random_state=rng)
53+
data = np.reshape(data, (len(data), -1))
54+
buffer.append(data)
55+
index += 1
56+
if index % 10 == 0:
57+
data = np.concatenate(buffer, axis=0)
58+
data -= np.mean(data, axis=0)
59+
data /= np.std(data, axis=0)
60+
kmeans.partial_fit(data)
61+
buffer = []
62+
if index % 100 == 0:
63+
print 'Partial fit of %4i out of %i' % (index,
64+
6 * len(faces.images))
5865

5966
dt = time.time() - t0
6067
print 'done in %.2fs.' % dt
@@ -69,8 +76,9 @@
6976
pl.xticks(())
7077
pl.yticks(())
7178

79+
7280
pl.suptitle('Patches of faces\nTrain time %.1fs on %d patches' %
73-
(dt, len(faces.images)), fontsize=16)
81+
(dt, 8 * len(faces.images)), fontsize=16)
7482
pl.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
7583

7684
pl.show()

0 commit comments

Comments
 (0)