|
2 | 2 | import numpy as np
|
3 | 3 | from keras.preprocessing.image import ImageDataGenerator
|
4 | 4 | from keras.models import Sequential
|
| 5 | +from keras.callbacks import Callback |
5 | 6 | from keras.layers import Dropout, Flatten, Dense
|
6 |
| -from keras import applications |
| 7 | +from keras.applications.vgg16 import VGG16, preprocess_input |
| 8 | +from dogcat_data import generators, get_nb_files |
| 9 | +import os |
| 10 | +import sys |
| 11 | +import wandb |
| 12 | +from wandb.keras import WandbCallback |
7 | 13 |
|
8 |
| -# dimensions of our images. |
9 |
| -img_width, img_height = 150, 150 |
| 14 | +wandb.init() |
| 15 | +config = wandb.config |
10 | 16 |
|
11 |
| -top_model_weights_path = 'bottleneck_fc_model.h5' |
12 |
| -train_data_dir = 'dogcat-data/train' |
13 |
| -validation_data_dir = 'dogcat-data/validation' |
14 |
| -nb_train_samples = 2000 |
15 |
| -nb_validation_samples = 2000 |
16 |
| -epochs = 50 |
17 |
| -batch_size = 10 # should be divisible nb_samples |
| 17 | +# dimensions of our images. |
| 18 | +config.img_width = 224 |
| 19 | +config.img_height = 224 |
| 20 | +config.epochs = 50 |
| 21 | +config.batch_size = 40 |
18 | 22 |
|
| 23 | +top_model_weights_path = 'bottleneck.h5' |
| 24 | +train_dir = 'dogcat-data/train' |
| 25 | +validation_dir = 'dogcat-data/validation' |
| 26 | +nb_train_samples = 1000 |
| 27 | +nb_validation_samples = 1000 |
19 | 28 |
|
20 | 29 | def save_bottlebeck_features():
|
21 |
| - datagen = ImageDataGenerator(rescale=1. / 255) |
22 |
| - |
| 30 | + if os.path.exists('bottleneck_features_train.npy') and (len(sys.argv) == 1 or sys.argv[1] != "--force"): |
| 31 | + print("Using saved features, pass --force to save new features") |
| 32 | + return |
| 33 | + datagen = ImageDataGenerator(preprocessing_function=preprocess_input) |
| 34 | + train_generator = datagen.flow_from_directory( |
| 35 | + train_dir, |
| 36 | + target_size=(config.img_width, config.img_height), |
| 37 | + batch_size=config.batch_size, |
| 38 | + class_mode="binary") |
| 39 | + |
| 40 | + val_generator = datagen.flow_from_directory( |
| 41 | + validation_dir, |
| 42 | + target_size=(config.img_width, config.img_height), |
| 43 | + batch_size=config.batch_size, |
| 44 | + class_mode="binary") |
| 45 | + |
23 | 46 | # build the VGG16 network
|
24 |
| - model = applications.VGG16(include_top=False, weights='imagenet') |
25 |
| - |
26 |
| - generator = datagen.flow_from_directory( |
27 |
| - train_data_dir, |
28 |
| - target_size=(img_width, img_height), |
29 |
| - batch_size=batch_size, |
30 |
| - class_mode=None, |
31 |
| - shuffle=False) |
32 |
| - bottleneck_features_train = model.predict_generator( |
33 |
| - generator, nb_train_samples // batch_size) |
34 |
| - print(bottleneck_features_train.shape) |
35 |
| - np.save(open('bottleneck_features_train.npy', 'wb'), |
36 |
| - bottleneck_features_train) |
37 |
| - |
38 |
| - generator = datagen.flow_from_directory( |
39 |
| - validation_data_dir, |
40 |
| - target_size=(img_width, img_height), |
41 |
| - batch_size=batch_size, |
42 |
| - class_mode=None, |
43 |
| - shuffle=False) |
44 |
| - bottleneck_features_validation = model.predict_generator( |
45 |
| - generator, nb_validation_samples // batch_size) |
46 |
| - np.save(open('bottleneck_features_validation.npy', 'wb'), |
47 |
| - bottleneck_features_validation) |
| 47 | + model = VGG16(include_top=False, weights='imagenet') |
| 48 | + |
| 49 | + print("Predicting bottleneck training features") |
| 50 | + training_labels = [] |
| 51 | + training_features = [] |
| 52 | + for batch in range(5): #nb_train_samples // config.batch_size): |
| 53 | + data, labels = next(train_generator) |
| 54 | + training_labels.append(labels) |
| 55 | + training_features.append(model.predict(data)) |
| 56 | + training_labels = np.concatenate(training_labels) |
| 57 | + training_features = np.concatenate(training_features) |
| 58 | + np.savez(open('bottleneck_features_train.npy', 'wb'), |
| 59 | + features=training_features, labels=training_labels) |
| 60 | + |
| 61 | + print("Predicting bottleneck validation features") |
| 62 | + validation_labels = [] |
| 63 | + validation_features = [] |
| 64 | + validation_data = [] |
| 65 | + for batch in range(nb_validation_samples // config.batch_size): |
| 66 | + data, labels = next(val_generator) |
| 67 | + validation_features.append(model.predict(data)) |
| 68 | + validation_labels.append(labels) |
| 69 | + validation_data.append(data) |
| 70 | + validation_labels = np.concatenate(validation_labels) |
| 71 | + validation_features = np.concatenate(validation_features) |
| 72 | + validation_data = np.concatenate(validation_data) |
| 73 | + np.savez(open('bottleneck_features_validation.npy', 'wb'), |
| 74 | + features=training_features, labels=training_labels, data=validation_data) |
48 | 75 |
|
49 | 76 |
|
50 | 77 | def train_top_model():
|
51 |
| - train_data = np.load(open('bottleneck_features_train.npy', 'rb')) |
52 |
| - train_labels = np.array( |
53 |
| - [0] * int(nb_train_samples / 2) + [1] * int(nb_train_samples / 2)) |
54 |
| - |
55 |
| - validation_data = np.load(open('bottleneck_features_validation.npy', 'rb')) |
56 |
| - validation_labels = np.array( |
57 |
| - [0] * int(nb_validation_samples / 2) + [1] * int(nb_validation_samples / 2)) |
| 78 | + train = np.load(open('bottleneck_features_train.npy', 'rb')) |
| 79 | + X_train, y_train = (train['features'], train['labels']) |
| 80 | + test = np.load(open('bottleneck_features_validation.npy', 'rb')) |
| 81 | + X_test, y_test, val_data = (test['features'], test['labels'], test['data']) |
58 | 82 |
|
59 |
| - print(validation_data.shape) |
60 |
| - print(validation_labels.shape) |
61 |
| - #train_data.reshape(192,2048) |
62 |
| - #validation_data.reshape(192,2048) |
63 | 83 | model = Sequential()
|
64 |
| - model.add(Flatten(input_shape=train_data.shape[1:])) |
| 84 | + model.add(Flatten(input_shape=X_train[0].shape)) |
65 | 85 | model.add(Dense(256, activation='relu'))
|
66 | 86 | model.add(Dropout(0.5))
|
67 | 87 | model.add(Dense(1, activation='sigmoid'))
|
68 | 88 |
|
69 | 89 | model.compile(optimizer='rmsprop',
|
70 |
| - loss='binary_crossentropy', metrics=['accuracy']) |
71 |
| - |
72 |
| - model.fit(train_data, train_labels, |
73 |
| - epochs=epochs, |
74 |
| - batch_size=batch_size, |
75 |
| - validation_data=(validation_data, validation_labels)) |
| 90 | + loss='binary_crossentropy', metrics=['binary_accuracy']) |
| 91 | + |
| 92 | + class Images(Callback): |
| 93 | + def on_epoch_end(self, epoch, logs): |
| 94 | + base_model = VGG16(include_top=False, weights='imagenet') |
| 95 | + indices = np.random.randint(val_data.shape[0], size=36) |
| 96 | + test_data = val_data[indices] |
| 97 | + features = base_model.predict(np.array([preprocess_input(data) for data in test_data])) |
| 98 | + pred_data = model.predict(features) |
| 99 | + wandb.log({ |
| 100 | + "examples": [ |
| 101 | + wandb.Image(test_data[i], caption="cat" if pred_data[i] < 0.5 else "dog") |
| 102 | + for i, data in enumerate(test_data)] |
| 103 | + }, commit=False) |
| 104 | + |
| 105 | + model.fit(X_train, y_train, |
| 106 | + epochs=config.epochs, |
| 107 | + batch_size=config.batch_size, |
| 108 | + validation_data=(X_test, y_test), |
| 109 | + callbacks=[Images(), WandbCallback(save_model=False)]) |
76 | 110 | model.save_weights(top_model_weights_path)
|
77 | 111 |
|
78 | 112 |
|
|
0 commit comments