|
11 | 11 | and extract randomly 15 patches from this image. Once we have accumulated |
12 | 12 | 750 of these patches (using 50 images), we run the `partial_fit` method |
13 | 13 | of the online KMeans object, MiniBatchKMeans. |
| 14 | +
|
| 15 | +The verbose setting on the MiniBatchKMeans enables us to see that some |
| 16 | +clusters are reassigned during the successive calls to |
| 17 | +partial-fit. This is because the number of patches that they represent |
| 18 | +has become too low, and it is better to choose a random new |
| 19 | +cluster. |
14 | 20 | """ |
15 | 21 | print __doc__ |
16 | 22 |
|
|
24 | 30 | from sklearn.cluster import MiniBatchKMeans |
25 | 31 | from sklearn.feature_extraction.image import extract_patches_2d |
26 | 32 |
|
27 | | -faces = datasets.fetch_lfw_people() |
28 | | -data = faces.data |
| 33 | +faces = datasets.fetch_olivetti_faces() |
29 | 34 |
|
30 | 35 | ############################################################################### |
31 | 36 | # Learn the dictionary of images |
32 | 37 |
|
33 | 38 | print 'Learning the dictionary... ' |
34 | 39 | rng = np.random.RandomState(0) |
35 | | -kmeans = MiniBatchKMeans(n_clusters=81) |
| 40 | +kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True) |
36 | 41 | patch_size = (20, 20) |
37 | 42 |
|
38 | 43 | buffer = [] |
39 | 44 | index = 1 |
40 | 45 | t0 = time.time() |
41 | 46 |
|
42 | | -# The online learning part |
43 | | -for index, img in enumerate(faces.images): |
44 | | - data = extract_patches_2d(img, patch_size, |
45 | | - max_patches=15, random_state=rng) |
46 | | - data = np.reshape(data, (len(data), -1)) |
47 | | - buffer.append(data) |
48 | | - index += 1 |
49 | | - if index % 50 == 0: |
50 | | - data = np.concatenate(buffer, axis=0) |
51 | | - data -= np.mean(data, axis=0) |
52 | | - data /= np.std(data, axis=0) |
53 | | - kmeans.partial_fit(data) |
54 | | - buffer = [] |
55 | | - if index % 500 == 0: |
56 | | - print 'Partial fit of %4i out of %i' % (index, |
57 | | - len(faces.images)) |
| 47 | +# The online learning part: cycle over the whole dataset 4 times |
| 48 | +index = 0 |
| 49 | +for _ in range(6): |
| 50 | + for img in faces.images: |
| 51 | + data = extract_patches_2d(img, patch_size, |
| 52 | + max_patches=50, random_state=rng) |
| 53 | + data = np.reshape(data, (len(data), -1)) |
| 54 | + buffer.append(data) |
| 55 | + index += 1 |
| 56 | + if index % 10 == 0: |
| 57 | + data = np.concatenate(buffer, axis=0) |
| 58 | + data -= np.mean(data, axis=0) |
| 59 | + data /= np.std(data, axis=0) |
| 60 | + kmeans.partial_fit(data) |
| 61 | + buffer = [] |
| 62 | + if index % 100 == 0: |
| 63 | + print 'Partial fit of %4i out of %i' % (index, |
| 64 | + 6 * len(faces.images)) |
58 | 65 |
|
59 | 66 | dt = time.time() - t0 |
60 | 67 | print 'done in %.2fs.' % dt |
|
69 | 76 | pl.xticks(()) |
70 | 77 | pl.yticks(()) |
71 | 78 |
|
| 79 | + |
72 | 80 | pl.suptitle('Patches of faces\nTrain time %.1fs on %d patches' % |
73 | | - (dt, len(faces.images)), fontsize=16) |
| 81 | + (dt, 8 * len(faces.images)), fontsize=16) |
74 | 82 | pl.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) |
75 | 83 |
|
76 | 84 | pl.show() |
0 commit comments