Refactoring the propagation of data

mnielsen · mnielsen · commit 53a3e68094a1 · 2013-04-29T08:56:53.000-04:00
diff --git a/code/backprop2.py b/code/backprop2.py
@@ -74,7 +74,7 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
                 print "Epoch %s: %s" % (
                     j, 
                     sum(self.cost_cross_entropy(x, y) 
-                        for (x, y) in training_data[:1000])) 
+                        for (x, y) in training_data)) 
 
     def backprop(self, training_data, T, eta, lmbda, 
                  cost, cost_derivative):
@@ -167,6 +167,30 @@ def evaluate_training_results(self, training_data):
         return sum(int(x == y) 
                    for x, y in zip(training_results, actual_training_results))
 
+    def initial_feedforward(self, input_data, j):
+        """
+        Feedforward the elements ``x`` in the list ``input_data``
+        through the network until the ``j``th layer.  Return the list
+        of activations from the ``j``th layer.
+        """
+        for k in range(j):
+            intermediate_data = [
+                sigmoid_vec(np.dot(self.weights[k], x)+self.biases[k])
+                for x in input_data]
+        return intermediate_data
+
+    def final_feedforward(self, intermediate_data, j):
+        """
+        Feedforward the elements ``x`` in the list
+        ``intermediate_data`` through the network to the output.  The
+        elements in ``intermediate_data`` are assumed to be inputs to
+        the ``j``th layer."""
+        for k in range(j, len(self.weights)):
+            output_data = [
+                sigmoid_vec(np.dot(self.weights[k], a)+self.biases[k])
+                for a in intermediate_data]
+        return output_data
+
 #### Miscellaneous functions
 def minimal_cross_entropy(training_data):
     """
diff --git a/code/blog/common_knowledge.py b/code/blog/common_knowledge.py
@@ -13,14 +13,17 @@
 import mnist_loader
 
 # Third-party libraries
+import matplotlib
+import matplotlib.pyplot as plt
 import numpy as np
 
 
 #### Parameters
 # Size of the training sets.  May range from 1000 to 12,500.  Lower
 # will be faster, higher will give more accuracy.
 SIZE = 5000 
-
+# Number of hidden units in the autoencoder
+HIDDEN = 30
 
 print "\nGenerating training data"
 training_data, _, _ = mnist_loader.load_data_nn()
@@ -30,11 +33,11 @@
 test = [x for x, _ in training_data[37500:37500+SIZE]]
 
 print "\nFinding first autoencoder"
-ae_1 = Network([784, 30, 784])
+ae_1 = Network([784, HIDDEN, 784])
 ae_1.SGD(td_1, 4, 10, 0.01, 0.05)
 
 print "\nFinding second autoencoder"
-ae_2 = Network([784, 30, 784])
+ae_2 = Network([784, HIDDEN, 784])
 ae_2.SGD(td_1, 4, 10, 0.01, 0.05)
 
 print "\nGenerating encoded training data"
@@ -45,7 +48,7 @@
 encoded_training_data = zip(encoded_td_1, encoded_td_2)
 
 print "\nFinding mapping between theories"
-net = Network([30, 60, 30])
+net = Network([HIDDEN, HIDDEN])
 net.SGD(encoded_training_data, 6, 10, 0.01, 0.05)
 
 print """\nBaseline for comparison: decompress with the first autoencoder"""
@@ -55,7 +58,7 @@
 encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
                   for x in test]
 test_data = zip(encoded_test_1, encoded_test_2)
-net_baseline = Network([30, 784, 30])
+net_baseline = Network([HIDDEN, 784, HIDDEN])
 net_baseline.biases[0] = ae_1.biases[1]
 net_baseline.weights[0] = ae_1.weights[1]
 net_baseline.biases[1] = ae_2.biases[0]
@@ -70,3 +73,18 @@
 error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data)
 print "Average l1 error per training image: %s" % (error / SIZE,)
 
+print "\nComputing fiducial image inputs"
+fiducial_images_1 = [
+    ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
+    for j in range(HIDDEN)]
+fiducial_images_2 = [
+    ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
+    for j in range(HIDDEN)]
+image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), 
+                        np.concatenate(fiducial_images_2, axis=1)])
+fig = plt.figure()
+ax = fig.add_subplot(111)
+ax.matshow(image, cmap = matplotlib.cm.binary)
+plt.xticks(np.array([]))
+plt.yticks(np.array([]))
+plt.show()
diff --git a/code/deep_autoencoder.py b/code/deep_autoencoder.py
@@ -103,29 +103,6 @@ def train_nested_autoencoder_repl(
             j, double(self.initial_feedforward(training_data, j)),
             epochs, mini_batch_size, eta, lmbda)
 
-    def initial_feedforward(self, training_data, j):
-        """
-        Feedforward the elements ``x`` in ``training_data`` through
-        the network until the ``j``th layer.  Return the list of
-        activations.  
-        """
-        for k in range(j):
-            training_data = [
-                sigmoid_vec(np.dot(self.weights[k], x)+self.biases[k])
-                for x in training_data]
-        return training_data
-
-    def final_feedforward(self, data, j):
-        """
-        Feedforward the elements ``x`` in ``data`` through the network
-        to the output.  The elements in ``data`` are assumed to be
-        inputs to the ``j``th layer."""
-        for k in range(j, len(self.weights)):
-            data = [
-                sigmoid_vec(np.dot(self.weights[k], a)+self.biases[k])
-                for a in data]
-        return data
-
     def feature(self, j, k):
         """
         Return the output if neuron number ``k`` in layer ``j`` is
diff --git a/code/mnist_autoencoder.py b/code/mnist_autoencoder.py
@@ -33,7 +33,7 @@ def train_autoencoder(hidden_units, training_data):
     "Return a trained autoencoder."
     autoencoder_training_data = [(x, x) for x, _ in training_data]
     net = Network([784, hidden_units, 784])
-    net.SGD(autoencoder_training_data, 6, 10, 0.007, 0.05)
+    net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05)
     return net
 
 def plot_test_results(net, test_inputs, actual_test_results):
@@ -53,18 +53,31 @@ def plot_test_results(net, test_inputs, actual_test_results):
     plt.yticks(np.array([]))
     plt.show()
 
-def classifier(hidden_units):
+def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs):
     """
-    Train an autoencoder using the MNIST training data, and then use
-    the autoencoder to create a classifier with a single hidden layer.
+    Train a semi-supervised classifier.  We begin with pretraining,
+    creating an autoencoder which uses ``n_unlabeled_inputs`` from the
+    MNIST training data.  This is then converted into a classifier
+    which is fine-tuned using the ``n_labeled_inputs``.
+
+    For comparison a classifier is also created which does not make
+    use of the unlabeled data.
     """
     training_data, test_inputs, actual_test_results = \
         mnist_loader.load_data_nn()
-    net_ae = train_autoencoder(hidden_units, training_data)
+    print "\nUsing pretraining and %s items of unlabeled data" %\
+        n_unlabeled_inputs
+    net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs])
     net_c = Network([784, hidden_units, 10])
     net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)]
     net_c.weights = net_ae.weights[:1]+\
         [np.random.randn(10, hidden_units)/np.sqrt(10)]
-    net_c.SGD(training_data, 6, 10, 0.007, 0.05)
-    print net_c.evaluate(test_inputs, actual_test_results)
+    net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
+    print "Result on test data: %s / %s" % (
+        net_c.evaluate(test_inputs, actual_test_results), len(test_inputs))
+    print "Training a network with %s items of training data" % n_labeled_inputs
+    net = Network([784, hidden_units, 10])
+    net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
+    print "Result on test data: %s / %s" % (
+        net.evaluate(test_inputs, actual_test_results), len(test_inputs))
     return net_c