|
31 | 31 | "from tensorflow.examples.tutorials.mnist import input_data\n",
|
32 | 32 | "\n",
|
33 | 33 | "mnist = input_data.read_data_sets(\"MNIST_data/\")\n",
|
34 |
| - "X_train, y_train = mnist.train.images, mnist.train.labels\n", |
35 |
| - "X_validation, y_validation = mnist.train.images, mnist.train.labels\n", |
36 |
| - "X_test, y_test = mnist.test.images, mnist.test.labels" |
| 34 | + "X_train, y_train = mnist.train.images, mnist.train.labels\n", |
| 35 | + "X_validation, y_validation = mnist.validation.images, mnist.validation.labels\n", |
| 36 | + "X_test, y_test = mnist.test.images, mnist.test.labels\n", |
| 37 | + "\n", |
| 38 | + "assert(len(X_train) == len(y_train))\n", |
| 39 | + "assert(len(X_validation) == len(y_validation))\n", |
| 40 | + "assert(len(X_test) == len(y_test))\n", |
| 41 | + "\n", |
| 42 | + "print()\n", |
| 43 | + "print(\"Image Shape: {}\".format(X_train[0].shape))\n", |
| 44 | + "print()\n", |
| 45 | + "print(\"Training Set: {} samples\".format(len(X_train)))\n", |
| 46 | + "print(\"Validation Set: {} samples\".format(len(X_validation)))\n", |
| 47 | + "print(\"Test Set: {} samples\".format(len(X_test)))" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "cell_type": "markdown", |
| 52 | + "metadata": {}, |
| 53 | + "source": [ |
| 54 | + "The MNIST data that TensorFlow pre-loads comes as an \"unrolled\" vector of 784 pixels.\n", |
| 55 | + "\n", |
| 56 | + "However, the LeNet architecture only accepts 32x32 images.\n", |
| 57 | + "\n", |
| 58 | + "In order to reformat the MNIST data into a shape that LeNet will accept, we proceed in two steps:\n", |
| 59 | + "\n", |
| 60 | + "1. Reshape the 784 pixel vector into a 28x28 matrix (28x28 = 784).\n", |
| 61 | + "2. Pad the data with two rows of zeros on the top and bottom, and two columns of zeros on the left and right (28+2+2 = 32).\n", |
| 62 | + "\n", |
| 63 | + "You do not need to modify this section." |
| 64 | + ] |
| 65 | + }, |
| 66 | + { |
| 67 | + "cell_type": "code", |
| 68 | + "execution_count": null, |
| 69 | + "metadata": { |
| 70 | + "collapsed": false |
| 71 | + }, |
| 72 | + "outputs": [], |
| 73 | + "source": [ |
| 74 | + "import numpy as np\n", |
| 75 | + "\n", |
| 76 | + "# Reshape MNIST image from vector to matrix\n", |
| 77 | + "X_train = np.reshape(X_train, (-1, 28, 28, 1))\n", |
| 78 | + "X_validation = np.reshape(X_validation, (-1, 28, 28, 1))\n", |
| 79 | + "X_test = np.reshape(X_test, (-1, 28, 28, 1))\n", |
| 80 | + "\n", |
| 81 | + "# Pad images with 0s\n", |
| 82 | + "X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n", |
| 83 | + "X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n", |
| 84 | + "X_test = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n", |
| 85 | + " \n", |
| 86 | + "print(\"Updated Image Shape: {}\".format(X_train[0].shape))" |
37 | 87 | ]
|
38 | 88 | },
|
39 | 89 | {
|
|
61 | 111 | "%matplotlib inline\n",
|
62 | 112 | "\n",
|
63 | 113 | "index = random.randint(0, len(X_train))\n",
|
64 |
| - "image = X_train[index]\n", |
65 |
| - "\n", |
66 |
| - "# Reshape MNIST image from vector to matrix\n", |
67 |
| - "image = np.reshape(image, (28, 28))\n", |
| 114 | + "image = X_train[index].squeeze()\n", |
68 | 115 | "\n",
|
69 | 116 | "plt.figure(figsize=(1,1))\n",
|
70 | 117 | "plt.imshow(image, cmap=\"gray\")\n",
|
|
116 | 163 | "import tensorflow as tf\n",
|
117 | 164 | "\n",
|
118 | 165 | "EPOCHS = 10\n",
|
119 |
| - "BATCH_SIZE = 50" |
| 166 | + "BATCH_SIZE = 128" |
120 | 167 | ]
|
121 | 168 | },
|
122 | 169 | {
|
|
128 | 175 | "\n",
|
129 | 176 | "This is the only cell you need to edit.\n",
|
130 | 177 | "### Input\n",
|
131 |
| - "An MNIST image is initially 784 features (1D). We reshape this to (28, 28, 1) (3D), normalize such that the values are between 0-1 instead of 0-255, and finally, pad the image with 0s, such that the height and width are 32 (centers digit further). Thus, the input shape going into the first convolutional layer is 32x32x1.\n", |
| 178 | + "The LeNet architecture accepts a 32x32xC image as input, where C is the number of color channels. Since MNIST images are grayscale, C is 1 in this case.\n", |
132 | 179 | "\n",
|
133 | 180 | "### Architecture\n",
|
134 | 181 | "**Convolutional Layer 1.** The output shape should be 28x28x6.\n",
|
|
165 | 212 | "source": [
|
166 | 213 | "from tensorflow.contrib.layers import flatten\n",
|
167 | 214 | "\n",
|
168 |
| - "def LeNet(x):\n", |
169 |
| - " # Reshape from 2D to 4D. This prepares the data for\n", |
170 |
| - " # convolutional and pooling layers.\n", |
171 |
| - " x = tf.reshape(x, (-1, 28, 28, 1))\n", |
172 |
| - " # Pad 0s to 32x32. Centers the digit further.\n", |
173 |
| - " # Add 2 rows/columns on each side for height and width dimensions.\n", |
174 |
| - " x = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode=\"CONSTANT\")\n", |
175 |
| - " \n", |
| 215 | + "def LeNet(x): \n", |
176 | 216 | " # Hyperparameters\n",
|
177 | 217 | " mu = 0\n",
|
178 | 218 | " sigma = 0.1\n",
|
|
201 | 241 | "\n",
|
202 | 242 | " # SOLUTION: Flatten Layer.\n",
|
203 | 243 | " fc1 = flatten(conv2)\n",
|
204 |
| - " fc1_shape = (fc1.get_shape().as_list()[-1], 120)\n", |
205 | 244 | " \n",
|
206 |
| - " # SOLUTION: Fully Connected Layer 1. Input = 5x5x16. Output = 120.\n", |
207 |
| - " fc1_W = tf.Variable(tf.truncated_normal(shape=(fc1_shape), mean = mu, stddev = sigma))\n", |
| 245 | + " # SOLUTION: Fully Connected Layer 1. Input = 400 (5x5x16). Output = 120.\n", |
| 246 | + " fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma))\n", |
208 | 247 | " fc1_b = tf.Variable(tf.zeros(120))\n",
|
209 | 248 | " fc1 = tf.matmul(fc1, fc1_W) + fc1_b\n",
|
210 | 249 | " \n",
|
|
226 | 265 | "## Features and Labels\n",
|
227 | 266 | "Train LeNet to classify [MNIST](http://yann.lecun.com/exdb/mnist/) data.\n",
|
228 | 267 | "\n",
|
229 |
| - "Each MNIST image is a 28x28x1 grayscale representation of a digit. So the length of an \"unrolled\" MNIST image vector is **784** (28x28x1=784), and the number of classes in the label set is **10**.\n", |
| 268 | + "`x` is a placeholder for a batch of input images.\n", |
| 269 | + "`y` is a placeholder for a batch of output labels.\n", |
230 | 270 | "\n",
|
231 | 271 | "You do not need to modify this section."
|
232 | 272 | ]
|
|
239 | 279 | },
|
240 | 280 | "outputs": [],
|
241 | 281 | "source": [
|
242 |
| - "x = tf.placeholder(tf.float32, (None, 784))\n", |
| 282 | + "x = tf.placeholder(tf.float32, (None, 32, 32, 1))\n", |
243 | 283 | "y = tf.placeholder(tf.int32, (None))\n",
|
244 | 284 | "one_hot_y = tf.one_hot(y, 10)"
|
245 | 285 | ]
|
|
265 | 305 | "logits = LeNet(x)\n",
|
266 | 306 | "loss_operation = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y))\n",
|
267 | 307 | "optimizer = tf.train.AdamOptimizer()\n",
|
268 |
| - "training_operation = optimizer.minimize(loss_operation)\n", |
269 |
| - "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n", |
270 |
| - "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" |
| 308 | + "training_operation = optimizer.minimize(loss_operation)" |
271 | 309 | ]
|
272 | 310 | },
|
273 | 311 | {
|
274 | 312 | "cell_type": "markdown",
|
275 | 313 | "metadata": {},
|
276 | 314 | "source": [
|
277 | 315 | "## Model Evaluation\n",
|
278 |
| - "Evaluate how well the model classifies a batch of data.\n", |
279 |
| - "\n", |
280 |
| - "If the number of examples in the dataset is not evenly divisible by the batch size, this implementation ignores the remainder. This is fine for large datasets.\n", |
| 316 | + "Evaluate how well the loss and accuracy of the model for a given dataset.\n", |
281 | 317 | "\n",
|
282 | 318 | "You do not need to modify this section."
|
283 | 319 | ]
|
|
290 | 326 | },
|
291 | 327 | "outputs": [],
|
292 | 328 | "source": [
|
| 329 | + "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n", |
| 330 | + "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", |
| 331 | + "\n", |
293 | 332 | "def evaluate(X_data, y_data):\n",
|
294 | 333 | " num_examples = len(X_data)\n",
|
295 | 334 | " total_accuracy, total_loss = 0, 0\n",
|
|
0 commit comments