|
1 | | -""" |
2 | | -mnist_loader |
3 | | -~~~~~~~~~~~~ |
4 | | -
|
5 | | -A library to load the MNIST image data. For details of the data |
6 | | -structures that are returned, see the doc strings for ``load_data`` |
7 | | -and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the |
8 | | -function usually called by our neural network code. |
9 | | -""" |
10 | | - |
11 | | -#### Libraries |
12 | | -# Standard library |
13 | | -import cPickle |
14 | | -import gzip |
15 | | - |
16 | | -# Third-party libraries |
17 | | -import numpy as np |
18 | | - |
19 | | -def load_data(): |
20 | | - """Return the MNIST data as a tuple containing the training data, |
21 | | - the validation data, and the test data. |
22 | | -
|
23 | | - The ``training_data`` is returned as a tuple with two entries. |
24 | | - The first entry contains the actual training images. This is a |
25 | | - numpy ndarray with 50,000 entries. Each entry is, in turn, a |
26 | | - numpy ndarray with 784 values, representing the 28 * 28 = 784 |
27 | | - pixels in a single MNIST image. |
28 | | -
|
29 | | - The second entry in the ``training_data`` tuple is a numpy ndarray |
30 | | - containing 50,000 entries. Those entries are just the digit |
31 | | - values (0...9) for the corresponding images contained in the first |
32 | | - entry of the tuple. |
33 | | -
|
34 | | - The ``validation_data`` and ``test_data`` are similar, except |
35 | | - each contains only 10,000 images. |
36 | | -
|
37 | | - This is a nice data format, but for use in neural networks it's |
38 | | - helpful to modify the format of the ``training_data`` a little. |
39 | | - That's done in the wrapper function ``load_data_wrapper()``, see |
40 | | - below. |
41 | | - """ |
42 | | - f = gzip.open('../data/mnist.pkl.gz', 'rb') |
43 | | - training_data, validation_data, test_data = cPickle.load(f) |
44 | | - f.close() |
45 | | - return (training_data, validation_data, test_data) |
46 | | - |
47 | | -def load_data_wrapper(): |
48 | | - """Return a tuple containing ``(training_data, validation_data, |
49 | | - test_data)``. Based on ``load_data``, but the format is more |
50 | | - convenient for use in our implementation of neural networks. |
51 | | -
|
52 | | - In particular, ``training_data`` is a list containing 50,000 |
53 | | - 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray |
54 | | - containing the input image. ``y`` is a 10-dimensional |
55 | | - numpy.ndarray representing the unit vector corresponding to the |
56 | | - correct digit for ``x``. |
57 | | -
|
58 | | - ``validation_data`` and ``test_data`` are lists containing 10,000 |
59 | | - 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional |
60 | | - numpy.ndarry containing the input image, and ``y`` is the |
61 | | - corresponding classification, i.e., the digit values (integers) |
62 | | - corresponding to ``x``. |
63 | | -
|
64 | | - Obviously, this means we're using slightly different formats for |
65 | | - the training data and the validation / test data. These formats |
66 | | - turn out to be the most convenient for use in our neural network |
67 | | - code.""" |
68 | | - tr_d, va_d, te_d = load_data() |
69 | | - training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] |
70 | | - training_results = [vectorized_result(y) for y in tr_d[1]] |
71 | | - training_data = zip(training_inputs, training_results) |
72 | | - validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] |
73 | | - validation_data = zip(validation_inputs, va_d[1]) |
74 | | - test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] |
75 | | - test_data = zip(test_inputs, te_d[1]) |
76 | | - return (training_data, validation_data, test_data) |
77 | | - |
78 | | -def vectorized_result(j): |
79 | | - """Return a 10-dimensional unit vector with a 1.0 in the jth |
80 | | - position and zeroes elsewhere. This is used to convert a digit |
81 | | - (0...9) into a corresponding desired output from the neural |
82 | | - network.""" |
83 | | - e = np.zeros((10, 1)) |
84 | | - e[j] = 1.0 |
85 | | - return e |
| 1 | +""" |
| 2 | +mnist_loader |
| 3 | +~~~~~~~~~~~~ |
| 4 | +
|
| 5 | +A library to load the MNIST image data. For details of the data |
| 6 | +structures that are returned, see the doc strings for ``load_data`` |
| 7 | +and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the |
| 8 | +function usually called by our neural network code. |
| 9 | +""" |
| 10 | + |
| 11 | +#### Libraries |
| 12 | +# Standard library |
| 13 | +import cPickle |
| 14 | +import gzip |
| 15 | + |
| 16 | +# Third-party libraries |
| 17 | +import numpy as np |
| 18 | + |
| 19 | +def load_data(): |
| 20 | + """Return the MNIST data as a tuple containing the training data, |
| 21 | + the validation data, and the test data. |
| 22 | +
|
| 23 | + The ``training_data`` is returned as a tuple with two entries. |
| 24 | + The first entry contains the actual training images. This is a |
| 25 | + numpy ndarray with 50,000 entries. Each entry is, in turn, a |
| 26 | + numpy ndarray with 784 values, representing the 28 * 28 = 784 |
| 27 | + pixels in a single MNIST image. |
| 28 | +
|
| 29 | + The second entry in the ``training_data`` tuple is a numpy ndarray |
| 30 | + containing 50,000 entries. Those entries are just the digit |
| 31 | + values (0...9) for the corresponding images contained in the first |
| 32 | + entry of the tuple. |
| 33 | +
|
| 34 | + The ``validation_data`` and ``test_data`` are similar, except |
| 35 | + each contains only 10,000 images. |
| 36 | +
|
| 37 | + This is a nice data format, but for use in neural networks it's |
| 38 | + helpful to modify the format of the ``training_data`` a little. |
| 39 | + That's done in the wrapper function ``load_data_wrapper()``, see |
| 40 | + below. |
| 41 | + """ |
| 42 | + f = gzip.open('../data/mnist.pkl.gz', 'rb') |
| 43 | + training_data, validation_data, test_data = cPickle.load(f) |
| 44 | + f.close() |
| 45 | + return (training_data, validation_data, test_data) |
| 46 | + |
| 47 | +def load_data_wrapper(): |
| 48 | + """Return a tuple containing ``(training_data, validation_data, |
| 49 | + test_data)``. Based on ``load_data``, but the format is more |
| 50 | + convenient for use in our implementation of neural networks. |
| 51 | +
|
| 52 | + In particular, ``training_data`` is a list containing 50,000 |
| 53 | + 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray |
| 54 | + containing the input image. ``y`` is a 10-dimensional |
| 55 | + numpy.ndarray representing the unit vector corresponding to the |
| 56 | + correct digit for ``x``. |
| 57 | +
|
| 58 | + ``validation_data`` and ``test_data`` are lists containing 10,000 |
| 59 | + 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional |
| 60 | + numpy.ndarry containing the input image, and ``y`` is the |
| 61 | + corresponding classification, i.e., the digit values (integers) |
| 62 | + corresponding to ``x``. |
| 63 | +
|
| 64 | + Obviously, this means we're using slightly different formats for |
| 65 | + the training data and the validation / test data. These formats |
| 66 | + turn out to be the most convenient for use in our neural network |
| 67 | + code.""" |
| 68 | + tr_d, va_d, te_d = load_data() |
| 69 | + training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] |
| 70 | + training_results = [vectorized_result(y) for y in tr_d[1]] |
| 71 | + training_data = zip(training_inputs, training_results) |
| 72 | + validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] |
| 73 | + validation_data = zip(validation_inputs, va_d[1]) |
| 74 | + test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] |
| 75 | + test_data = zip(test_inputs, te_d[1]) |
| 76 | + return (training_data, validation_data, test_data) |
| 77 | + |
| 78 | +def vectorized_result(j): |
| 79 | + """Return a 10-dimensional unit vector with a 1.0 in the jth |
| 80 | + position and zeroes elsewhere. This is used to convert a digit |
| 81 | + (0...9) into a corresponding desired output from the neural |
| 82 | + network.""" |
| 83 | + e = np.zeros((10, 1)) |
| 84 | + e[j] = 1.0 |
| 85 | + return e |
0 commit comments