refactor to move MNIST reshaping into one cell

dsilver829 · dsilver829 · commit ad526f261439 · 2016-12-19T11:27:05.000-08:00
diff --git a/LeNet-Lab-Solution.ipynb b/LeNet-Lab-Solution.ipynb
@@ -31,9 +31,59 @@
     "from tensorflow.examples.tutorials.mnist import input_data\n",
     "\n",
     "mnist = input_data.read_data_sets(\"MNIST_data/\")\n",
-    "X_train, y_train = mnist.train.images, mnist.train.labels\n",
-    "X_validation, y_validation = mnist.train.images, mnist.train.labels\n",
-    "X_test, y_test   = mnist.test.images, mnist.test.labels"
+    "X_train, y_train           = mnist.train.images, mnist.train.labels\n",
+    "X_validation, y_validation = mnist.validation.images, mnist.validation.labels\n",
+    "X_test, y_test             = mnist.test.images, mnist.test.labels\n",
+    "\n",
+    "assert(len(X_train) == len(y_train))\n",
+    "assert(len(X_validation) == len(y_validation))\n",
+    "assert(len(X_test) == len(y_test))\n",
+    "\n",
+    "print()\n",
+    "print(\"Image Shape: {}\".format(X_train[0].shape))\n",
+    "print()\n",
+    "print(\"Training Set:   {} samples\".format(len(X_train)))\n",
+    "print(\"Validation Set: {} samples\".format(len(X_validation)))\n",
+    "print(\"Test Set:       {} samples\".format(len(X_test)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The MNIST data that TensorFlow pre-loads comes as an \"unrolled\" vector of 784 pixels.\n",
+    "\n",
+    "However, the LeNet architecture only accepts 32x32 images.\n",
+    "\n",
+    "In order to reformat the MNIST data into a shape that LeNet will accept, we proceed in two steps:\n",
+    "\n",
+    "1. Reshape the 784 pixel vector into a 28x28 matrix (28x28 = 784).\n",
+    "2. Pad the data with two rows of zeros on the top and bottom, and two columns of zeros on the left and right (28+2+2 = 32).\n",
+    "\n",
+    "You do not need to modify this section."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "# Reshape MNIST image from vector to matrix\n",
+    "X_train      = np.reshape(X_train, (-1, 28, 28, 1))\n",
+    "X_validation = np.reshape(X_validation, (-1, 28, 28, 1))\n",
+    "X_test       = np.reshape(X_test, (-1, 28, 28, 1))\n",
+    "\n",
+    "# Pad images with 0s\n",
+    "X_train      = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
+    "X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
+    "X_test       = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
+    "    \n",
+    "print(\"Updated Image Shape: {}\".format(X_train[0].shape))"
    ]
   },
   {
@@ -61,10 +111,7 @@
     "%matplotlib inline\n",
     "\n",
     "index = random.randint(0, len(X_train))\n",
-    "image = X_train[index]\n",
-    "\n",
-    "# Reshape MNIST image from vector to matrix\n",
-    "image = np.reshape(image, (28, 28))\n",
+    "image = X_train[index].squeeze()\n",
     "\n",
     "plt.figure(figsize=(1,1))\n",
     "plt.imshow(image, cmap=\"gray\")\n",
@@ -116,7 +163,7 @@
     "import tensorflow as tf\n",
     "\n",
     "EPOCHS = 10\n",
-    "BATCH_SIZE = 50"
+    "BATCH_SIZE = 128"
    ]
   },
   {
@@ -128,7 +175,7 @@
     "\n",
     "This is the only cell you need to edit.\n",
     "### Input\n",
-    "An MNIST image is initially 784 features (1D). We reshape this to (28, 28, 1) (3D), normalize such that the values are between 0-1 instead of 0-255, and finally, pad the image with 0s, such that the height and width are 32 (centers digit further). Thus, the input shape going into the first convolutional layer is 32x32x1.\n",
+    "The LeNet architecture accepts a 32x32xC image as input, where C is the number of color channels. Since MNIST images are grayscale, C is 1 in this case.\n",
     "\n",
     "### Architecture\n",
     "**Convolutional Layer 1.** The output shape should be 28x28x6.\n",
@@ -165,14 +212,7 @@
    "source": [
     "from tensorflow.contrib.layers import flatten\n",
     "\n",
-    "def LeNet(x):\n",
-    "    # Reshape from 2D to 4D. This prepares the data for\n",
-    "    # convolutional and pooling layers.\n",
-    "    x = tf.reshape(x, (-1, 28, 28, 1))\n",
-    "    # Pad 0s to 32x32. Centers the digit further.\n",
-    "    # Add 2 rows/columns on each side for height and width dimensions.\n",
-    "    x = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode=\"CONSTANT\")\n",
-    "    \n",
+    "def LeNet(x):    \n",
     "    # Hyperparameters\n",
     "    mu = 0\n",
     "    sigma = 0.1\n",
@@ -201,10 +241,9 @@
     "\n",
     "    # SOLUTION: Flatten Layer.\n",
     "    fc1 = flatten(conv2)\n",
-    "    fc1_shape = (fc1.get_shape().as_list()[-1], 120)\n",
     "    \n",
-    "    # SOLUTION: Fully Connected Layer 1. Input = 5x5x16. Output = 120.\n",
-    "    fc1_W     = tf.Variable(tf.truncated_normal(shape=(fc1_shape), mean = mu, stddev = sigma))\n",
+    "    # SOLUTION: Fully Connected Layer 1. Input = 400 (5x5x16). Output = 120.\n",
+    "    fc1_W     = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma))\n",
     "    fc1_b     = tf.Variable(tf.zeros(120))\n",
     "    fc1       = tf.matmul(fc1, fc1_W) + fc1_b\n",
     "    \n",
@@ -226,7 +265,8 @@
     "## Features and Labels\n",
     "Train LeNet to classify [MNIST](http://yann.lecun.com/exdb/mnist/) data.\n",
     "\n",
-    "Each MNIST image is a 28x28x1 grayscale representation of a digit. So the length of an \"unrolled\" MNIST image vector is **784** (28x28x1=784), and the number of classes in the label set is **10**.\n",
+    "`x` is a placeholder for a batch of input images.\n",
+    "`y` is a placeholder for a batch of output labels.\n",
     "\n",
     "You do not need to modify this section."
    ]
@@ -239,7 +279,7 @@
    },
    "outputs": [],
    "source": [
-    "x = tf.placeholder(tf.float32, (None, 784))\n",
+    "x = tf.placeholder(tf.float32, (None, 32, 32, 1))\n",
     "y = tf.placeholder(tf.int32, (None))\n",
     "one_hot_y = tf.one_hot(y, 10)"
    ]
@@ -265,19 +305,15 @@
     "logits = LeNet(x)\n",
     "loss_operation = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y))\n",
     "optimizer = tf.train.AdamOptimizer()\n",
-    "training_operation = optimizer.minimize(loss_operation)\n",
-    "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
-    "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
+    "training_operation = optimizer.minimize(loss_operation)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Model Evaluation\n",
-    "Evaluate how well the model classifies a batch of data.\n",
-    "\n",
-    "If the number of examples in the dataset is not evenly divisible by the batch size, this implementation ignores the remainder. This is fine for large datasets.\n",
+    "Evaluate how well the loss and accuracy of the model for a given dataset.\n",
     "\n",
     "You do not need to modify this section."
    ]
@@ -290,6 +326,9 @@
    },
    "outputs": [],
    "source": [
+    "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
+    "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
+    "\n",
     "def evaluate(X_data, y_data):\n",
     "    num_examples = len(X_data)\n",
     "    total_accuracy, total_loss = 0, 0\n",
diff --git a/LeNet-Lab.ipynb b/LeNet-Lab.ipynb
@@ -31,9 +31,59 @@
     "from tensorflow.examples.tutorials.mnist import input_data\n",
     "\n",
     "mnist = input_data.read_data_sets(\"MNIST_data/\")\n",
-    "X_train, y_train = mnist.train.images, mnist.train.labels\n",
-    "X_validation, y_validation = mnist.train.images, mnist.train.labels\n",
-    "X_test, y_test   = mnist.test.images, mnist.test.labels"
+    "X_train, y_train           = mnist.train.images, mnist.train.labels\n",
+    "X_validation, y_validation = mnist.validation.images, mnist.validation.labels\n",
+    "X_test, y_test             = mnist.test.images, mnist.test.labels\n",
+    "\n",
+    "assert(len(X_train) == len(y_train))\n",
+    "assert(len(X_validation) == len(y_validation))\n",
+    "assert(len(X_test) == len(y_test))\n",
+    "\n",
+    "print()\n",
+    "print(\"Image Shape: {}\".format(X_train[0].shape))\n",
+    "print()\n",
+    "print(\"Training Set:   {} samples\".format(len(X_train)))\n",
+    "print(\"Validation Set: {} samples\".format(len(X_validation)))\n",
+    "print(\"Test Set:       {} samples\".format(len(X_test)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The MNIST data that TensorFlow pre-loads comes as an \"unrolled\" vector of 784 pixels.\n",
+    "\n",
+    "However, the LeNet architecture only accepts 32x32 images.\n",
+    "\n",
+    "In order to reformat the MNIST data into a shape that LeNet will accept, we proceed in two steps:\n",
+    "\n",
+    "1. Reshape the 784 pixel vector into a 28x28 matrix (28x28 = 784).\n",
+    "2. Pad the data with two rows of zeros on the top and bottom, and two columns of zeros on the left and right (28+2+2 = 32).\n",
+    "\n",
+    "You do not need to modify this section."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "# Reshape MNIST image from vector to matrix\n",
+    "X_train      = np.reshape(X_train, (-1, 28, 28, 1))\n",
+    "X_validation = np.reshape(X_validation, (-1, 28, 28, 1))\n",
+    "X_test       = np.reshape(X_test, (-1, 28, 28, 1))\n",
+    "\n",
+    "# Pad images with 0s\n",
+    "X_train      = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
+    "X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
+    "X_test       = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')\n",
+    "    \n",
+    "print(\"Updated Image Shape: {}\".format(X_train[0].shape))"
    ]
   },
   {
@@ -61,10 +111,7 @@
     "%matplotlib inline\n",
     "\n",
     "index = random.randint(0, len(X_train))\n",
-    "image = X_train[index]\n",
-    "\n",
-    "# Reshape MNIST image from vector to matrix\n",
-    "image = np.reshape(image, (28, 28))\n",
+    "image = X_train[index].squeeze()\n",
     "\n",
     "plt.figure(figsize=(1,1))\n",
     "plt.imshow(image, cmap=\"gray\")\n",
@@ -116,7 +163,7 @@
     "import tensorflow as tf\n",
     "\n",
     "EPOCHS = 10\n",
-    "BATCH_SIZE = 50"
+    "BATCH_SIZE = 128"
    ]
   },
   {
@@ -128,7 +175,7 @@
     "\n",
     "This is the only cell you need to edit.\n",
     "### Input\n",
-    "An MNIST image is initially 784 features (1D). We reshape this to (28, 28, 1) (3D), normalize such that the values are between 0-1 instead of 0-255, and finally, pad the image with 0s, such that the height and width are 32 (centers digit further). Thus, the input shape going into the first convolutional layer is 32x32x1.\n",
+    "The LeNet architecture accepts a 32x32xC image as input, where C is the number of color channels. Since MNIST images are grayscale, C is 1 in this case.\n",
     "\n",
     "### Architecture\n",
     "**Convolutional Layer 1.** The output shape should be 28x28x6.\n",
@@ -157,21 +204,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "collapsed": true
    },
    "outputs": [],
    "source": [
     "from tensorflow.contrib.layers import flatten\n",
     "\n",
-    "def LeNet(x):\n",
-    "    # Reshape from 2D to 4D. This prepares the data for\n",
-    "    # convolutional and pooling layers.\n",
-    "    x = tf.reshape(x, (-1, 28, 28, 1))\n",
-    "    # Pad 0s to 32x32. Centers the digit further.\n",
-    "    # Add 2 rows/columns on each side for height and width dimensions.\n",
-    "    x = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode=\"CONSTANT\")\n",
+    "def LeNet(x):    \n",
+    "    # Hyperparameters\n",
+    "    mu = 0\n",
+    "    sigma = 0.1\n",
     "    \n",
     "    # TODO: Convolution Layer 1. Input = 32x32x1. Output = 28x28x6.\n",
     "\n",
@@ -190,7 +234,7 @@
     "    # TODO: Fully Connected Layer 1. Input = 5x5x16. Output = 120.\n",
     "    \n",
     "    # TODO: Activation 3.\n",
-    "    \n",
+    "\n",
     "    # TODO: Fully Connected Layer 2. Input = 120. Output = 10.\n",
     "    \n",
     "    return logits"
@@ -203,7 +247,8 @@
     "## Features and Labels\n",
     "Train LeNet to classify [MNIST](http://yann.lecun.com/exdb/mnist/) data.\n",
     "\n",
-    "Each MNIST image is a 28x28x1 grayscale representation of a digit. So the length of an \"unrolled\" MNIST image vector is **784** (28x28x1=784), and the number of classes in the label set is **10**.\n",
+    "`x` is a placeholder for a batch of input images.\n",
+    "`y` is a placeholder for a batch of output labels.\n",
     "\n",
     "You do not need to modify this section."
    ]
@@ -216,7 +261,7 @@
    },
    "outputs": [],
    "source": [
-    "x = tf.placeholder(tf.float32, (None, 784))\n",
+    "x = tf.placeholder(tf.float32, (None, 32, 32, 1))\n",
     "y = tf.placeholder(tf.int32, (None))\n",
     "one_hot_y = tf.one_hot(y, 10)"
    ]
@@ -242,19 +287,15 @@
     "logits = LeNet(x)\n",
     "loss_operation = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y))\n",
     "optimizer = tf.train.AdamOptimizer()\n",
-    "training_operation = optimizer.minimize(loss_operation)\n",
-    "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
-    "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
+    "training_operation = optimizer.minimize(loss_operation)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Model Evaluation\n",
-    "Evaluate how well the model classifies a batch of data.\n",
-    "\n",
-    "If the number of examples in the dataset is not evenly divisible by the batch size, this implementation ignores the remainder. This is fine for large datasets.\n",
+    "Evaluate how well the loss and accuracy of the model for a given dataset.\n",
     "\n",
     "You do not need to modify this section."
    ]
@@ -267,6 +308,9 @@
    },
    "outputs": [],
    "source": [
+    "correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))\n",
+    "accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
+    "\n",
     "def evaluate(X_data, y_data):\n",
     "    num_examples = len(X_data)\n",
     "    total_accuracy, total_loss = 0, 0\n",