Added plotting function to nntrain. Removed normalizing code from nntrain.

rasmusbergpalm · rasmusbergpalm · commit b87302884526 · 2013-03-11T21:27:59.000+01:00
diff --git a/NN/nneval.m b/NN/nneval.m
@@ -7,12 +7,10 @@
 nn                    = nnff(nn, train_x, train_y);
 loss.train.e(end + 1) = nn.L;
 
+% validation performance
 if nargin == 6
-    
-    % validation performance
     nn                    = nnff(nn, val_x, val_y);
     loss.val.e(end + 1)   = nn.L;
-    
 end
 
 %calc misclassification rate if softmax
diff --git a/NN/nnpredict.m b/NN/nnpredict.m
@@ -1,8 +1,4 @@
 function labels = nnpredict(nn, x)
-    if nn.normalize_input==1;
-       x = (x-repmat(nn.normalizeMean,size(x,1),1))./repmat(nn.normalizeStd,size(x,1),1);
-    end
-    
     nn.testing = 1;
     nn = nnff(nn, x, zeros(size(x,1), nn.size(end)));
     nn.testing = 0;
diff --git a/NN/nnsetup.m b/NN/nnsetup.m
@@ -6,7 +6,6 @@
     nn.size   = architecture;
     nn.n      = numel(nn.size);
     
-    nn.normalize_input                  = 1;            %  normalize input elements to be between [-1 1]. Note: use a linear output function if training auto-encoders with normalized inputs
     nn.activation_function              = 'tanh_opt';   %  Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh).
     nn.learningRate                     = 2;            %  learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs.
     nn.momentum                         = 0.5;          %  Momentum
diff --git a/NN/nntrain.m b/NN/nntrain.m
@@ -11,34 +11,20 @@
 
 loss.train.e               = [];
 loss.train.e_frac          = [];
-if nargin == 4 % training data
-    opts.validation = 0;
-    
-else  %training data and validation data
+loss.val.e                 = [];
+loss.val.e_frac            = [];
+opts.validation = 0;
+if nargin == 6
     opts.validation = 1;
-    loss.val.e                 = [];
-    loss.val.e_frac            = [];
 end
 
-if ~isfield(opts,'plot')
-    fhandle = [];
-elseif opts.plot == 1
+fhandle = [];
+if isfield(opts,'plot') && opts.plot == 1
     fhandle = figure();
-else
-    fhandle = [];
 end
 
-
 m = size(train_x, 1);
 
-if nn.normalize_input==1
-    [train_x, mu, sigma] = zscore(train_x);
-    nn.normalizeMean = mu;
-    sigma(sigma==0) = 0.0001;%this should be very small value.
-    nn.normalizeStd  = sigma;
-end
-
-
 batchsize = opts.batchsize;
 numepochs = opts.numepochs;
 
@@ -74,14 +60,12 @@
     t = toc;
     
     if ishandle(fhandle)
-        
         if opts.validation == 1
-            loss = nneval(nn,loss,train_x,train_y,val_x,val_y);
+            loss = nneval(nn, loss, train_x, train_y, val_x, val_y);
         else
-            loss = nneval(nn,loss,train_x,train_y);
+            loss = nneval(nn, loss, train_x, train_y);
         end
-        
-        nnupdatefigures(nn,fhandle,loss,opts,i);
+        nnupdatefigures(nn, fhandle, loss, opts, i);
     end
         
     disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1))))]);
diff --git a/NN/nnupdatefigures.m b/NN/nnupdatefigures.m
@@ -11,32 +11,25 @@ function nnupdatefigures(nn,fhandle,L,opts,i)
     
     %create data for plots
     if strcmp(nn.output,'softmax')
-        
         plot_x       = x_ax';
         plot_ye      = L.train.e';
         plot_yfrac   = L.train.e_frac';
         
     else
-        
         plot_x       = x_ax';
         plot_ye      = L.train.e';
-        
     end
     
     %add error on validation data if present
     if opts.validation == 1
-        
         plot_x       = [plot_x, x_ax'];
         plot_ye      = [plot_ye,L.val.e'];
-        
     end
     
     
     %add classification error on validation data if present
     if opts.validation == 1 && strcmp(nn.output,'softmax')
-        
-        plot_yfrac   = [plot_yfrac, L.val.e_frac'];
-        
+        plot_yfrac   = [plot_yfrac, L.val.e_frac'];        
     end
     
 %    plotting
@@ -54,7 +47,6 @@ function nnupdatefigures(nn,fhandle,L,opts,i)
             set(gca,'LegendColorbarListeners',[]);
             setappdata(gca,'LegendColorbarManualSpace',1);
             setappdata(gca,'LegendColorbarReclaimSpace',1);
-            
         end
                
         p2 = subplot(1,2,2);
@@ -68,7 +60,6 @@ function nnupdatefigures(nn,fhandle,L,opts,i)
             set(gca,'LegendColorbarListeners',[]);
             setappdata(gca,'LegendColorbarManualSpace',1);
             setappdata(gca,'LegendColorbarReclaimSpace',1);
-            
         end
 
     else
diff --git a/README.md b/README.md
@@ -91,7 +91,6 @@ dbn = dbntrain(dbn, train_x, opts);
 
 %unfold dbn to nn
 nn = dbnunfoldtonn(dbn, 10);
-nn.normalize_input = 0;
 nn.activation_function = 'sigm';
 
 %train nn
@@ -121,7 +120,6 @@ test_y  = double(test_y);
 %  Setup and train a stacked denoising autoencoder (SDAE)
 rng(0);
 sae = saesetup([784 100]);
-sae.ae{1}.normalize_input           = 0;
 sae.ae{1}.activation_function       = 'sigm';
 sae.ae{1}.learningRate              = 1;
 sae.ae{1}.inputZeroMaskedFraction   = 0.5;
@@ -132,7 +130,6 @@ visualize(sae.ae{1}.W{1}(:,2:end)')
 
 % Use the SDAE to initialize a FFNN
 nn = nnsetup([784 100 10]);
-nn.normalize_input                  = 0;
 nn.activation_function              = 'sigm';
 nn.learningRate                     = 1;
 nn.W{1} = sae.ae{1}.W{1};
@@ -148,12 +145,10 @@ assert(er < 0.16, 'Too big error');
 %  Setup and train a stacked denoising autoencoder (SDAE)
 rng(0);
 sae = saesetup([784 100 100]);
-sae.ae{1}.normalize_input           = 0;
 sae.ae{1}.activation_function       = 'sigm';
 sae.ae{1}.learningRate              = 1;
 sae.ae{1}.inputZeroMaskedFraction   = 0.5;
 
-sae.ae{2}.normalize_input           = 0;
 sae.ae{2}.activation_function       = 'sigm';
 sae.ae{2}.learningRate              = 1;
 sae.ae{2}.inputZeroMaskedFraction   = 0.5;
@@ -165,7 +160,6 @@ visualize(sae.ae{1}.W{1}(:,2:end)')
 
 % Use the SDAE to initialize a FFNN
 nn = nnsetup([784 100 100 10]);
-nn.normalize_input                  = 0;
 nn.activation_function              = 'sigm';
 nn.learningRate                     = 1;
 
@@ -236,6 +230,10 @@ test_x  = double(test_x)  / 255;
 train_y = double(train_y);
 test_y  = double(test_y);
 
+% normalize
+[train_x, mu, sigma] = zscore(train_x);
+test_x = normalize(test_x, mu, sigma);
+
 %% ex1 vanilla neural net
 rng(0);
 nn = nnsetup([784 100 10]);
@@ -282,18 +280,48 @@ nn = nntrain(nn, train_x, train_y, opts);
 [er, bad] = nntest(nn, test_x, test_y);
 assert(er < 0.1, 'Too big error');
 
-%% ex4 neural net with sigmoid activation function, and without normalizing inputs
+%% ex4 neural net with sigmoid activation function
 rng(0);
 nn = nnsetup([784 100 10]);
 
 nn.activation_function = 'sigm';    %  Sigmoid activation function
-nn.normalize_input = 0;             %  Don't normalize inputs
-nn.learningRate = 1;                %  Sigm and non-normalized inputs require a lower learning rate
+nn.learningRate = 1;                %  Sigm require a lower learning rate
 opts.numepochs =  1;                %  Number of full sweeps through data
 opts.batchsize = 100;               %  Take a mean gradient step over this many samples
 
 nn = nntrain(nn, train_x, train_y, opts);
 
+[er, bad] = nntest(nn, test_x, test_y);
+assert(er < 0.1, 'Too big error');
+
+%% ex5 plotting functionality
+rng(0);
+nn = nnsetup([784 20 10]);
+opts.numepochs         = 5;            %  Number of full sweeps through data
+nn.output              = 'softmax';    %  use softmax output
+opts.batchsize         = 1000;         %  Take a mean gradient step over this many samples
+opts.plot              = 1;            %  enable plotting
+
+nn = nntrain(nn, train_x, train_y, opts);
+
+[er, bad] = nntest(nn, test_x, test_y);
+assert(er < 0.1, 'Too big error');
+
+%% ex6 neural net with sigmoid activation and plotting of validation and training error
+% split training data into training and validation data
+vx   = train_x(1:10000,:);
+tx = train_x(10001:end,:);
+vy   = train_y(1:10000,:);
+ty = train_y(10001:end,:);
+
+rng(0);
+nn                      = nnsetup([784 20 10]);     
+nn.output               = 'softmax';                   %  use softmax output
+opts.numepochs          = 5;                           %  Number of full sweeps through data
+opts.batchsize          = 1000;                        %  Take a mean gradient step over this many samples
+opts.plot               = 1;                           %  enable plotting
+nn = nntrain(nn, tx, ty, opts, vx, vy);                %  nntrain takes validation set as last two arguments (optionally)
+
 [er, bad] = nntest(nn, test_x, test_y);
 assert(er < 0.1, 'Too big error');
 ```
diff --git a/tests/test_example_DBN.m b/tests/test_example_DBN.m
@@ -30,7 +30,6 @@
 
 %unfold dbn to nn
 nn = dbnunfoldtonn(dbn, 10);
-nn.normalize_input = 0;
 nn.activation_function = 'sigm';
 
 %train nn
diff --git a/tests/test_example_NN.m b/tests/test_example_NN.m
@@ -6,6 +6,10 @@
 train_y = double(train_y);
 test_y  = double(test_y);
 
+% normalize
+[train_x, mu, sigma] = zscore(train_x);
+test_x = normalize(test_x, mu, sigma);
+
 %% ex1 vanilla neural net
 rng(0);
 nn = nnsetup([784 100 10]);
@@ -52,13 +56,12 @@
 [er, bad] = nntest(nn, test_x, test_y);
 assert(er < 0.1, 'Too big error');
 
-%% ex4 neural net with sigmoid activation function, and without normalizing inputs
+%% ex4 neural net with sigmoid activation function
 rng(0);
 nn = nnsetup([784 100 10]);
 
 nn.activation_function = 'sigm';    %  Sigmoid activation function
-nn.normalize_input = 0;             %  Don't normalize inputs
-nn.learningRate = 1;                %  Sigm and non-normalized inputs require a lower learning rate
+nn.learningRate = 1;                %  Sigm require a lower learning rate
 opts.numepochs =  1;                %  Number of full sweeps through data
 opts.batchsize = 100;               %  Take a mean gradient step over this many samples
 
@@ -70,44 +73,30 @@
 %% ex5 plotting functionality
 rng(0);
 nn = nnsetup([784 20 10]);
-nn.normalize_input     = 1; 
-nn.activation_function = 'tanh_opt';    %  tanh activation function
-nn.normalize_input     = 1;             %  Normalize inputs to range [-1,1]
-nn.learningRate        = 0.1;           %  Decrearse learning rate to get smooth decrease in errors
-opts.numepochs         = 20;            %  Number of full sweeps through data
-opts.batchsize         = 100;           %  Take a mean gradient step over this many samples
-opts.plot              = 1;             %  enable plotting
+opts.numepochs         = 5;            %  Number of full sweeps through data
+nn.output              = 'softmax';    %  use softmax output
+opts.batchsize         = 1000;         %  Take a mean gradient step over this many samples
+opts.plot              = 1;            %  enable plotting
 
 nn = nntrain(nn, train_x, train_y, opts);
 
 [er, bad] = nntest(nn, test_x, test_y);
 assert(er < 0.1, 'Too big error');
 
 %% ex6 neural net with sigmoid activation and plotting of validation and training error
-
-% create train,val and test set
-% splits: 10000-10000-50000
-load mnist_uint8;
-
-val_x   = double(train_x(1:10000,:)) / 255;
-train_x = double(train_x(10001:end,:)) / 255;
-test_x  = double(test_x)/255;
-
-val_y   = double(train_y(1:10000,:));
-train_y = double(train_y(10001:end,:));
-test_y  = double(test_y);
-
+% split training data into training and validation data
+vx   = train_x(1:10000,:);
+tx = train_x(10001:end,:);
+vy   = train_y(1:10000,:);
+ty = train_y(10001:end,:);
 
 rng(0);
 nn                      = nnsetup([784 20 10]);     
-nn.normalize_input      = 0;                            %  dont normalize because we use sigmoid activation
-nn.activation_function  = 'sigm';                       %  use sigmoid activation
-nn.output               = 'softmax';                    %  use softmax output
-nn.learningRate         = 0.1;                          %  Decrease learning rate, otherwise the errors does not decrease nicely
-opts.numepochs          = 20;                           %  Number of full sweeps through data
-opts.batchsize          = 100;                          %  Take a mean gradient step over this many samples
-opts.plot               = 1;                            %  enable plotting
-nn = nntrain(nn, train_x, train_y, opts,val_x,val_y);   %  nntrain takes validation set as last two arguments (optionally)
+nn.output               = 'softmax';                   %  use softmax output
+opts.numepochs          = 5;                           %  Number of full sweeps through data
+opts.batchsize          = 1000;                        %  Take a mean gradient step over this many samples
+opts.plot               = 1;                           %  enable plotting
+nn = nntrain(nn, tx, ty, opts, vx, vy);                %  nntrain takes validation set as last two arguments (optionally)
 
 [er, bad] = nntest(nn, test_x, test_y);
-assert(er < 0.06, 'Too big error');
+assert(er < 0.1, 'Too big error');
diff --git a/tests/test_example_SAE.m b/tests/test_example_SAE.m
@@ -10,7 +10,6 @@
 %  Setup and train a stacked denoising autoencoder (SDAE)
 rng(0);
 sae = saesetup([784 100]);
-sae.ae{1}.normalize_input           = 0;
 sae.ae{1}.activation_function       = 'sigm';
 sae.ae{1}.learningRate              = 1;
 sae.ae{1}.inputZeroMaskedFraction   = 0.5;
@@ -21,7 +20,6 @@
 
 % Use the SDAE to initialize a FFNN
 nn = nnsetup([784 100 10]);
-nn.normalize_input                  = 0;
 nn.activation_function              = 'sigm';
 nn.learningRate                     = 1;
 nn.W{1} = sae.ae{1}.W{1};
@@ -37,12 +35,10 @@
 %  Setup and train a stacked denoising autoencoder (SDAE)
 rng(0);
 sae = saesetup([784 100 100]);
-sae.ae{1}.normalize_input           = 0;
 sae.ae{1}.activation_function       = 'sigm';
 sae.ae{1}.learningRate              = 1;
 sae.ae{1}.inputZeroMaskedFraction   = 0.5;
 
-sae.ae{2}.normalize_input           = 0;
 sae.ae{2}.activation_function       = 'sigm';
 sae.ae{2}.learningRate              = 1;
 sae.ae{2}.inputZeroMaskedFraction   = 0.5;
@@ -54,7 +50,6 @@
 
 % Use the SDAE to initialize a FFNN
 nn = nnsetup([784 100 100 10]);
-nn.normalize_input                  = 0;
 nn.activation_function              = 'sigm';
 nn.learningRate                     = 1;
 
diff --git a/tests/test_nn_gradients_are_numerically_correct.m b/tests/test_nn_gradients_are_numerically_correct.m
diff --git a/util/zscore.m b/util/zscore.m