Skip to content

Commit ad526f2

Browse files
committed
refactor to move MNIST reshaping into one cell
1 parent d9c8ef6 commit ad526f2

File tree

2 files changed

+137
-54
lines changed

2 files changed

+137
-54
lines changed

LeNet-Lab-Solution.ipynb

Lines changed: 67 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,59 @@
3131
"from tensorflow.examples.tutorials.mnist import input_data\n",
3232
"\n",
3333
"mnist = input_data.read_data_sets(\"MNIST_data/\")\n",
34-
"X_train, y_train = mnist.train.images, mnist.train.labels\n",
35-
"X_validation, y_validation = mnist.train.images, mnist.train.labels\n",
36-
"X_test, y_test = mnist.test.images, mnist.test.labels"
34+
"X_train, y_train = mnist.train.images, mnist.train.labels\n",
35+
"X_validation, y_validation = mnist.validation.images, mnist.validation.labels\n",
36+
"X_test, y_test = mnist.test.images, mnist.test.labels\n",
37+
"\n",
38+
"assert(len(X_train) == len(y_train))\n",
39+
"assert(len(X_validation) == len(y_validation))\n",
40+
"assert(len(X_test) == len(y_test))\n",
41+
"\n",
42+
"print()\n",
43+
"print(\"Image Shape: {}\".format(X_train[0].shape))\n",
44+
"print()\n",
45+
"print(\"Training Set: {} samples\".format(len(X_train)))\n",
46+
"print(\"Validation Set: {} samples\".format(len(X_validation)))\n",
47+
"print(\"Test Set: {} samples\".format(len(X_test)))"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"The MNIST data that TensorFlow pre-loads comes as an \"unrolled\" vector of 784 pixels.\n",
55+
"\n",
56+
"However, the LeNet architecture only accepts 32x32 images.\n",
57+
"\n",
58+
"In order to reformat the MNIST data into a shape that LeNet will accept, we proceed in two steps:\n",
59+
"\n",
60+
"1. Reshape the 784 pixel vector into a 28x28 matrix (28x28 = 784).\n",
61+
"2. Pad the data with two rows of zeros on the top and bottom, and two columns of zeros on the left and right (28+2+2 = 32).\n",
62+
"\n",
63+
"You do not need to modify this section."
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": null,
69+
"metadata": {
70+
"collapsed": false
71+
},
72+
"outputs": [],
73+
"source": [
74+
"import numpy as np\n",
75+
"\n",
76+
"# Reshape MNIST image from vector to matrix\n",
77+
"X_train = np.reshape(X_train, (-1, 28, 28, 1))\n",
78+
"X_validation = np.reshape(X_validation, (-1, 28, 28, 1))\n",
79+
"X_test = np.reshape(X_test, (-1, 28, 28, 1))\n",
80+
"\n",
81+
"# Pad images with 0s\n",
82+
"X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
83+
"X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
84+
"X_test = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
85+
" \n",
86+
"print(\"Updated Image Shape: {}\".format(X_train[0].shape))"
3787
]
3888
},
3989
{
@@ -61,10 +111,7 @@
61111
"%matplotlib inline\n",
62112
"\n",
63113
"index = random.randint(0, len(X_train))\n",
64-
"image = X_train[index]\n",
65-
"\n",
66-
"# Reshape MNIST image from vector to matrix\n",
67-
"image = np.reshape(image, (28, 28))\n",
114+
"image = X_train[index].squeeze()\n",
68115
"\n",
69116
"plt.figure(figsize=(1,1))\n",
70117
"plt.imshow(image, cmap=\"gray\")\n",
@@ -116,7 +163,7 @@
116163
"import tensorflow as tf\n",
117164
"\n",
118165
"EPOCHS = 10\n",
119-
"BATCH_SIZE = 50"
166+
"BATCH_SIZE = 128"
120167
]
121168
},
122169
{
@@ -128,7 +175,7 @@
128175
"\n",
129176
"This is the only cell you need to edit.\n",
130177
"### Input\n",
131-
"An MNIST image is initially 784 features (1D). We reshape this to (28, 28, 1) (3D), normalize such that the values are between 0-1 instead of 0-255, and finally, pad the image with 0s, such that the height and width are 32 (centers digit further). Thus, the input shape going into the first convolutional layer is 32x32x1.\n",
178+
"The LeNet architecture accepts a 32x32xC image as input, where C is the number of color channels. Since MNIST images are grayscale, C is 1 in this case.\n",
132179
"\n",
133180
"### Architecture\n",
134181
"**Convolutional Layer 1.** The output shape should be 28x28x6.\n",
@@ -165,14 +212,7 @@
165212
"source": [
166213
"from tensorflow.contrib.layers import flatten\n",
167214
"\n",
168-
"def LeNet(x):\n",
169-
" # Reshape from 2D to 4D. This prepares the data for\n",
170-
" # convolutional and pooling layers.\n",
171-
" x = tf.reshape(x, (-1, 28, 28, 1))\n",
172-
" # Pad 0s to 32x32. Centers the digit further.\n",
173-
" # Add 2 rows/columns on each side for height and width dimensions.\n",
174-
" x = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode=\"CONSTANT\")\n",
175-
" \n",
215+
"def LeNet(x): \n",
176216
" # Hyperparameters\n",
177217
" mu = 0\n",
178218
" sigma = 0.1\n",
@@ -201,10 +241,9 @@
201241
"\n",
202242
" # SOLUTION: Flatten Layer.\n",
203243
" fc1 = flatten(conv2)\n",
204-
" fc1_shape = (fc1.get_shape().as_list()[-1], 120)\n",
205244
" \n",
206-
" # SOLUTION: Fully Connected Layer 1. Input = 5x5x16. Output = 120.\n",
207-
" fc1_W = tf.Variable(tf.truncated_normal(shape=(fc1_shape), mean = mu, stddev = sigma))\n",
245+
" # SOLUTION: Fully Connected Layer 1. Input = 400 (5x5x16). Output = 120.\n",
246+
" fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma))\n",
208247
" fc1_b = tf.Variable(tf.zeros(120))\n",
209248
" fc1 = tf.matmul(fc1, fc1_W) + fc1_b\n",
210249
" \n",
@@ -226,7 +265,8 @@
226265
"## Features and Labels\n",
227266
"Train LeNet to classify [MNIST](http://yann.lecun.com/exdb/mnist/) data.\n",
228267
"\n",
229-
"Each MNIST image is a 28x28x1 grayscale representation of a digit. So the length of an \"unrolled\" MNIST image vector is **784** (28x28x1=784), and the number of classes in the label set is **10**.\n",
268+
"`x` is a placeholder for a batch of input images.\n",
269+
"`y` is a placeholder for a batch of output labels.\n",
230270
"\n",
231271
"You do not need to modify this section."
232272
]
@@ -239,7 +279,7 @@
239279
},
240280
"outputs": [],
241281
"source": [
242-
"x = tf.placeholder(tf.float32, (None, 784))\n",
282+
"x = tf.placeholder(tf.float32, (None, 32, 32, 1))\n",
243283
"y = tf.placeholder(tf.int32, (None))\n",
244284
"one_hot_y = tf.one_hot(y, 10)"
245285
]
@@ -265,19 +305,15 @@
265305
"logits = LeNet(x)\n",
266306
"loss_operation = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y))\n",
267307
"optimizer = tf.train.AdamOptimizer()\n",
268-
"training_operation = optimizer.minimize(loss_operation)\n",
269-
"correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
270-
"accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
308+
"training_operation = optimizer.minimize(loss_operation)"
271309
]
272310
},
273311
{
274312
"cell_type": "markdown",
275313
"metadata": {},
276314
"source": [
277315
"## Model Evaluation\n",
278-
"Evaluate how well the model classifies a batch of data.\n",
279-
"\n",
280-
"If the number of examples in the dataset is not evenly divisible by the batch size, this implementation ignores the remainder. This is fine for large datasets.\n",
316+
"Evaluate how well the loss and accuracy of the model for a given dataset.\n",
281317
"\n",
282318
"You do not need to modify this section."
283319
]
@@ -290,6 +326,9 @@
290326
},
291327
"outputs": [],
292328
"source": [
329+
"correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
330+
"accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
331+
"\n",
293332
"def evaluate(X_data, y_data):\n",
294333
" num_examples = len(X_data)\n",
295334
" total_accuracy, total_loss = 0, 0\n",

LeNet-Lab.ipynb

Lines changed: 70 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,59 @@
3131
"from tensorflow.examples.tutorials.mnist import input_data\n",
3232
"\n",
3333
"mnist = input_data.read_data_sets(\"MNIST_data/\")\n",
34-
"X_train, y_train = mnist.train.images, mnist.train.labels\n",
35-
"X_validation, y_validation = mnist.train.images, mnist.train.labels\n",
36-
"X_test, y_test = mnist.test.images, mnist.test.labels"
34+
"X_train, y_train = mnist.train.images, mnist.train.labels\n",
35+
"X_validation, y_validation = mnist.validation.images, mnist.validation.labels\n",
36+
"X_test, y_test = mnist.test.images, mnist.test.labels\n",
37+
"\n",
38+
"assert(len(X_train) == len(y_train))\n",
39+
"assert(len(X_validation) == len(y_validation))\n",
40+
"assert(len(X_test) == len(y_test))\n",
41+
"\n",
42+
"print()\n",
43+
"print(\"Image Shape: {}\".format(X_train[0].shape))\n",
44+
"print()\n",
45+
"print(\"Training Set: {} samples\".format(len(X_train)))\n",
46+
"print(\"Validation Set: {} samples\".format(len(X_validation)))\n",
47+
"print(\"Test Set: {} samples\".format(len(X_test)))"
48+
]
49+
},
50+
{
51+
"cell_type": "markdown",
52+
"metadata": {},
53+
"source": [
54+
"The MNIST data that TensorFlow pre-loads comes as an \"unrolled\" vector of 784 pixels.\n",
55+
"\n",
56+
"However, the LeNet architecture only accepts 32x32 images.\n",
57+
"\n",
58+
"In order to reformat the MNIST data into a shape that LeNet will accept, we proceed in two steps:\n",
59+
"\n",
60+
"1. Reshape the 784 pixel vector into a 28x28 matrix (28x28 = 784).\n",
61+
"2. Pad the data with two rows of zeros on the top and bottom, and two columns of zeros on the left and right (28+2+2 = 32).\n",
62+
"\n",
63+
"You do not need to modify this section."
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": null,
69+
"metadata": {
70+
"collapsed": false
71+
},
72+
"outputs": [],
73+
"source": [
74+
"import numpy as np\n",
75+
"\n",
76+
"# Reshape MNIST image from vector to matrix\n",
77+
"X_train = np.reshape(X_train, (-1, 28, 28, 1))\n",
78+
"X_validation = np.reshape(X_validation, (-1, 28, 28, 1))\n",
79+
"X_test = np.reshape(X_test, (-1, 28, 28, 1))\n",
80+
"\n",
81+
"# Pad images with 0s\n",
82+
"X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
83+
"X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
84+
"X_test = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
85+
" \n",
86+
"print(\"Updated Image Shape: {}\".format(X_train[0].shape))"
3787
]
3888
},
3989
{
@@ -61,10 +111,7 @@
61111
"%matplotlib inline\n",
62112
"\n",
63113
"index = random.randint(0, len(X_train))\n",
64-
"image = X_train[index]\n",
65-
"\n",
66-
"# Reshape MNIST image from vector to matrix\n",
67-
"image = np.reshape(image, (28, 28))\n",
114+
"image = X_train[index].squeeze()\n",
68115
"\n",
69116
"plt.figure(figsize=(1,1))\n",
70117
"plt.imshow(image, cmap=\"gray\")\n",
@@ -116,7 +163,7 @@
116163
"import tensorflow as tf\n",
117164
"\n",
118165
"EPOCHS = 10\n",
119-
"BATCH_SIZE = 50"
166+
"BATCH_SIZE = 128"
120167
]
121168
},
122169
{
@@ -128,7 +175,7 @@
128175
"\n",
129176
"This is the only cell you need to edit.\n",
130177
"### Input\n",
131-
"An MNIST image is initially 784 features (1D). We reshape this to (28, 28, 1) (3D), normalize such that the values are between 0-1 instead of 0-255, and finally, pad the image with 0s, such that the height and width are 32 (centers digit further). Thus, the input shape going into the first convolutional layer is 32x32x1.\n",
178+
"The LeNet architecture accepts a 32x32xC image as input, where C is the number of color channels. Since MNIST images are grayscale, C is 1 in this case.\n",
132179
"\n",
133180
"### Architecture\n",
134181
"**Convolutional Layer 1.** The output shape should be 28x28x6.\n",
@@ -157,21 +204,18 @@
157204
},
158205
{
159206
"cell_type": "code",
160-
"execution_count": null,
207+
"execution_count": 1,
161208
"metadata": {
162209
"collapsed": true
163210
},
164211
"outputs": [],
165212
"source": [
166213
"from tensorflow.contrib.layers import flatten\n",
167214
"\n",
168-
"def LeNet(x):\n",
169-
" # Reshape from 2D to 4D. This prepares the data for\n",
170-
" # convolutional and pooling layers.\n",
171-
" x = tf.reshape(x, (-1, 28, 28, 1))\n",
172-
" # Pad 0s to 32x32. Centers the digit further.\n",
173-
" # Add 2 rows/columns on each side for height and width dimensions.\n",
174-
" x = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode=\"CONSTANT\")\n",
215+
"def LeNet(x): \n",
216+
" # Hyperparameters\n",
217+
" mu = 0\n",
218+
" sigma = 0.1\n",
175219
" \n",
176220
" # TODO: Convolution Layer 1. Input = 32x32x1. Output = 28x28x6.\n",
177221
"\n",
@@ -190,7 +234,7 @@
190234
" # TODO: Fully Connected Layer 1. Input = 5x5x16. Output = 120.\n",
191235
" \n",
192236
" # TODO: Activation 3.\n",
193-
" \n",
237+
"\n",
194238
" # TODO: Fully Connected Layer 2. Input = 120. Output = 10.\n",
195239
" \n",
196240
" return logits"
@@ -203,7 +247,8 @@
203247
"## Features and Labels\n",
204248
"Train LeNet to classify [MNIST](http://yann.lecun.com/exdb/mnist/) data.\n",
205249
"\n",
206-
"Each MNIST image is a 28x28x1 grayscale representation of a digit. So the length of an \"unrolled\" MNIST image vector is **784** (28x28x1=784), and the number of classes in the label set is **10**.\n",
250+
"`x` is a placeholder for a batch of input images.\n",
251+
"`y` is a placeholder for a batch of output labels.\n",
207252
"\n",
208253
"You do not need to modify this section."
209254
]
@@ -216,7 +261,7 @@
216261
},
217262
"outputs": [],
218263
"source": [
219-
"x = tf.placeholder(tf.float32, (None, 784))\n",
264+
"x = tf.placeholder(tf.float32, (None, 32, 32, 1))\n",
220265
"y = tf.placeholder(tf.int32, (None))\n",
221266
"one_hot_y = tf.one_hot(y, 10)"
222267
]
@@ -242,19 +287,15 @@
242287
"logits = LeNet(x)\n",
243288
"loss_operation = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y))\n",
244289
"optimizer = tf.train.AdamOptimizer()\n",
245-
"training_operation = optimizer.minimize(loss_operation)\n",
246-
"correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
247-
"accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
290+
"training_operation = optimizer.minimize(loss_operation)"
248291
]
249292
},
250293
{
251294
"cell_type": "markdown",
252295
"metadata": {},
253296
"source": [
254297
"## Model Evaluation\n",
255-
"Evaluate how well the model classifies a batch of data.\n",
256-
"\n",
257-
"If the number of examples in the dataset is not evenly divisible by the batch size, this implementation ignores the remainder. This is fine for large datasets.\n",
298+
"Evaluate how well the loss and accuracy of the model for a given dataset.\n",
258299
"\n",
259300
"You do not need to modify this section."
260301
]
@@ -267,6 +308,9 @@
267308
},
268309
"outputs": [],
269310
"source": [
311+
"correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
312+
"accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
313+
"\n",
270314
"def evaluate(X_data, y_data):\n",
271315
" num_examples = len(X_data)\n",
272316
" total_accuracy, total_loss = 0, 0\n",

0 commit comments

Comments
 (0)