Skip to content

Commit 2d074cc

Browse files
author
vanpelt
committed
Lots of fixes for transfer learning
1 parent 0a226ba commit 2d074cc

11 files changed

+409
-447
lines changed

keras-transfer/config-defaults.yaml

Lines changed: 0 additions & 26 deletions
This file was deleted.

keras-transfer/dogcat-bottleneck.py

Lines changed: 88 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2,77 +2,111 @@
22
import numpy as np
33
from keras.preprocessing.image import ImageDataGenerator
44
from keras.models import Sequential
5+
from keras.callbacks import Callback
56
from keras.layers import Dropout, Flatten, Dense
6-
from keras import applications
7+
from keras.applications.vgg16 import VGG16, preprocess_input
8+
from dogcat_data import generators, get_nb_files
9+
import os
10+
import sys
11+
import wandb
12+
from wandb.keras import WandbCallback
713

8-
# dimensions of our images.
9-
img_width, img_height = 150, 150
14+
wandb.init()
15+
config = wandb.config
1016

11-
top_model_weights_path = 'bottleneck_fc_model.h5'
12-
train_data_dir = 'dogcat-data/train'
13-
validation_data_dir = 'dogcat-data/validation'
14-
nb_train_samples = 2000
15-
nb_validation_samples = 2000
16-
epochs = 50
17-
batch_size = 10 # should be divisible nb_samples
17+
# dimensions of our images.
18+
config.img_width = 224
19+
config.img_height = 224
20+
config.epochs = 50
21+
config.batch_size = 40
1822

23+
top_model_weights_path = 'bottleneck.h5'
24+
train_dir = 'dogcat-data/train'
25+
validation_dir = 'dogcat-data/validation'
26+
nb_train_samples = 1000
27+
nb_validation_samples = 1000
1928

2029
def save_bottlebeck_features():
21-
datagen = ImageDataGenerator(rescale=1. / 255)
22-
30+
if os.path.exists('bottleneck_features_train.npy') and (len(sys.argv) == 1 or sys.argv[1] != "--force"):
31+
print("Using saved features, pass --force to save new features")
32+
return
33+
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
34+
train_generator = datagen.flow_from_directory(
35+
train_dir,
36+
target_size=(config.img_width, config.img_height),
37+
batch_size=config.batch_size,
38+
class_mode="binary")
39+
40+
val_generator = datagen.flow_from_directory(
41+
validation_dir,
42+
target_size=(config.img_width, config.img_height),
43+
batch_size=config.batch_size,
44+
class_mode="binary")
45+
2346
# build the VGG16 network
24-
model = applications.VGG16(include_top=False, weights='imagenet')
25-
26-
generator = datagen.flow_from_directory(
27-
train_data_dir,
28-
target_size=(img_width, img_height),
29-
batch_size=batch_size,
30-
class_mode=None,
31-
shuffle=False)
32-
bottleneck_features_train = model.predict_generator(
33-
generator, nb_train_samples // batch_size)
34-
print(bottleneck_features_train.shape)
35-
np.save(open('bottleneck_features_train.npy', 'wb'),
36-
bottleneck_features_train)
37-
38-
generator = datagen.flow_from_directory(
39-
validation_data_dir,
40-
target_size=(img_width, img_height),
41-
batch_size=batch_size,
42-
class_mode=None,
43-
shuffle=False)
44-
bottleneck_features_validation = model.predict_generator(
45-
generator, nb_validation_samples // batch_size)
46-
np.save(open('bottleneck_features_validation.npy', 'wb'),
47-
bottleneck_features_validation)
47+
model = VGG16(include_top=False, weights='imagenet')
48+
49+
print("Predicting bottleneck training features")
50+
training_labels = []
51+
training_features = []
52+
for batch in range(5): #nb_train_samples // config.batch_size):
53+
data, labels = next(train_generator)
54+
training_labels.append(labels)
55+
training_features.append(model.predict(data))
56+
training_labels = np.concatenate(training_labels)
57+
training_features = np.concatenate(training_features)
58+
np.savez(open('bottleneck_features_train.npy', 'wb'),
59+
features=training_features, labels=training_labels)
60+
61+
print("Predicting bottleneck validation features")
62+
validation_labels = []
63+
validation_features = []
64+
validation_data = []
65+
for batch in range(nb_validation_samples // config.batch_size):
66+
data, labels = next(val_generator)
67+
validation_features.append(model.predict(data))
68+
validation_labels.append(labels)
69+
validation_data.append(data)
70+
validation_labels = np.concatenate(validation_labels)
71+
validation_features = np.concatenate(validation_features)
72+
validation_data = np.concatenate(validation_data)
73+
np.savez(open('bottleneck_features_validation.npy', 'wb'),
74+
features=training_features, labels=training_labels, data=validation_data)
4875

4976

5077
def train_top_model():
51-
train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
52-
train_labels = np.array(
53-
[0] * int(nb_train_samples / 2) + [1] * int(nb_train_samples / 2))
54-
55-
validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
56-
validation_labels = np.array(
57-
[0] * int(nb_validation_samples / 2) + [1] * int(nb_validation_samples / 2))
78+
train = np.load(open('bottleneck_features_train.npy', 'rb'))
79+
X_train, y_train = (train['features'], train['labels'])
80+
test = np.load(open('bottleneck_features_validation.npy', 'rb'))
81+
X_test, y_test, val_data = (test['features'], test['labels'], test['data'])
5882

59-
print(validation_data.shape)
60-
print(validation_labels.shape)
61-
#train_data.reshape(192,2048)
62-
#validation_data.reshape(192,2048)
6383
model = Sequential()
64-
model.add(Flatten(input_shape=train_data.shape[1:]))
84+
model.add(Flatten(input_shape=X_train[0].shape))
6585
model.add(Dense(256, activation='relu'))
6686
model.add(Dropout(0.5))
6787
model.add(Dense(1, activation='sigmoid'))
6888

6989
model.compile(optimizer='rmsprop',
70-
loss='binary_crossentropy', metrics=['accuracy'])
71-
72-
model.fit(train_data, train_labels,
73-
epochs=epochs,
74-
batch_size=batch_size,
75-
validation_data=(validation_data, validation_labels))
90+
loss='binary_crossentropy', metrics=['binary_accuracy'])
91+
92+
class Images(Callback):
93+
def on_epoch_end(self, epoch, logs):
94+
base_model = VGG16(include_top=False, weights='imagenet')
95+
indices = np.random.randint(val_data.shape[0], size=36)
96+
test_data = val_data[indices]
97+
features = base_model.predict(np.array([preprocess_input(data) for data in test_data]))
98+
pred_data = model.predict(features)
99+
wandb.log({
100+
"examples": [
101+
wandb.Image(test_data[i], caption="cat" if pred_data[i] < 0.5 else "dog")
102+
for i, data in enumerate(test_data)]
103+
}, commit=False)
104+
105+
model.fit(X_train, y_train,
106+
epochs=config.epochs,
107+
batch_size=config.batch_size,
108+
validation_data=(X_test, y_test),
109+
callbacks=[Images(), WandbCallback(save_model=False)])
76110
model.save_weights(top_model_weights_path)
77111

78112

keras-transfer/dogcat-finetune.py

Lines changed: 0 additions & 124 deletions
This file was deleted.

0 commit comments

Comments
 (0)