Rolled back some commits.

albertoandreottiATgmail · albertoandreottiATgmail · commit 17fc78716d53 · 2013-10-22T13:44:36.000-03:00
diff --git a/CNN/cnnavg.m b/CNN/cnnavg.m
@@ -0,0 +1,15 @@
+function avgnet = cnnavg(avgnet, net)
+    for l = 2 : numel(net.layers)
+        if strcmp(net.layers{l}.type, 'c')
+            for j = 1 : numel(net.layers{l}.a)
+                for ii = 1 : numel(net.layers{l - 1}.a)
+                    avgnet.layers{l}.k{ii}{j} = (avgnet.layers{l}.k{ii}{j} + net.layers{l}.k{ii}{j})/2;
+                end
+                avgnet.layers{l}.b{j} = avgnet.layers{l}.b{j} + net.layers{l}.b{j};
+            end
+        end
+    end
+
+    avgnet.ffW = (avgnet.ffW + net.ffW)/2;
+    avgnet.ffb = (avgnet.ffb + net.ffb)/2 ;
+end
diff --git a/CNN/cnncopy.m b/CNN/cnncopy.m
@@ -0,0 +1,18 @@
+function cpnet = cnncopy(cpnet, net)
+
+
+
+    for l = 2 : numel(net.layers)
+        if strcmp(net.layers{l}.type, 'c')
+            for j = 1 : numel(net.layers{l}.a)
+                for ii = 1 : numel(net.layers{l - 1}.a)
+                    cpnet.layers{l}.k{ii}{j} = net.layers{l}.k{ii}{j};
+                end
+                cpnet.layers{l}.b{j} = net.layers{l}.b{j};
+            end
+        end
+    end
+
+    cpnet.ffW = net.ffW;
+    cpnet.ffb = net.ffb;
+end
diff --git a/CNN/cnntrain.m b/CNN/cnntrain.m
@@ -1,27 +1,29 @@
+
 function net = cnntrain(net, x, y, opts)
     m = size(x, 3);
     numbatches = m / opts.batchsize;
     if rem(numbatches, 1) ~= 0
         error('numbatches not integer');
     end
+
     net.rL = [];
     for i = 1 : opts.numepochs
         disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)]);
         tic;
         kk = randperm(m);
-        for l = 1 : numbatches
-            batch_x = x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));
-            batch_y = y(:,    kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));
+        %how many processes?
+        numWorkers = 4
+		pids = 0:numWorkers-1;
+		starts = pids * numbatches / numWorkers
+	
+		%process starts
+		turn = 0;
 
-            net = cnnff(net, batch_x);
-            net = cnnbp(net, batch_y);
-            net = cnnapplygrads(net, opts);
-            if isempty(net.rL)
-                net.rL(1) = net.L;
-            end
-            net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L;
-        end
+        pararrayfun(numWorkers,
+                    @(starts, pids)process_batch(x, y, kk, net, turn, starts, (numbatches/numWorkers),  pids, numWorkers, opts),
+                    starts,
+					pids,
+					"ErrorHandler" , @eh);
         toc;
     end
-    
 end
diff --git a/CNN/convadd.m b/CNN/convadd.m
@@ -0,0 +1,8 @@
+	%a is the image, k the kernel, o the output, i is the index.
+function result = convadd(a, k, m, pid, chunkSize)
+	init = pid*chunkSize + 1;
+	result = zeros(m, m);
+	for i=init:init+chunkSize -1
+	    result += conv2D(a(:,:, i), k(:,:, i));
+	end
+end
diff --git a/CNN/convn_valid.m b/CNN/convn_valid.m
@@ -0,0 +1,23 @@
+%Convolution for 3 dimensional vectors using conv2
+%equivalent to convn(A,B, 'valid')
+
+function result = convn_valid(A, B)
+
+    m = size(A, 1) - size(B, 1) + 1;
+    numWorkers = 2;
+	
+    function retcode = eh(error)
+        a = error
+        retcode = zeros(25, 1);	
+    end
+	
+	%each worker will write its output to specific part of the output
+	chunkSize = size(A,3)/numWorkers;
+	result = pararrayfun(numWorkers,  @(i)convadd(A, B, m, i, chunkSize), 0:numWorkers-1, "ErrorHandler" , @eh);
+	
+    for j=m:numWorkers*m:m  
+	    result(:,1:m) += result(:,j+1:j+m);
+	end
+	
+	result = result(1:m,1:m);
+end
diff --git a/CNN/eh.m b/CNN/eh.m
@@ -0,0 +1,5 @@
+%here goes what to put in the output when the function fails.
+function retcode = eh(err)
+    a = err
+    retcode = zeros(26,1).+255;	
+end
diff --git a/CNN/process_batch.m b/CNN/process_batch.m
@@ -0,0 +1,40 @@
+%l is the batch number
+function process_batch(x, y, kk, global_net, turn, start, numbatches,  pid, numWorkers, opts)
+	net.p = 0;
+	net.rL = [];
+	inited = 0;
+	
+	net.layers = {
+    struct('type', 'i') %input layer
+    struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer
+    struct('type', 's', 'scale', 2) %sub sampling layer
+    struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer
+    struct('type', 's', 'scale', 2) %subsampling layer
+    };
+	
+	for l = start + 1 : start + numbatches - 3
+	    %batch_x = x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));
+        %batch_y = y(:,    kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));
+
+		net = cnnff(net, x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize)));
+        net = cnnbp(net, y(:,    kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize)));
+        net = cnnapplygrads(net, opts);
+		
+		if inited == 0 
+		   net = cnncopy(net, global_net);
+		   inited = 1;
+		end
+       
+	    if isempty(net.rL)
+           net.rL(1) = net.L;
+        end
+        net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L;
+	
+	    %If we cannot update our results keep going to the next batch
+		if turn == pid
+	       global_net = cnnavg(global_net, net);
+		   net = cnncopy(net, global_net);
+		   turn = mod((turn + 1), numWorkers)
+	    end
+	end
+end
diff --git a/data/generate_mfcc.m b/data/generate_mfcc.m
@@ -0,0 +1,87 @@
+%this script will crawl the MFCC data from VoxForge to generate the en_de_it.mat file 
+%containing MFCCs for the three languages english, deutsch and italian.
+%this file assumes VoiceBox is in your Octave/Matlab's path.
+
+en_endpoint = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Audio/MFCC/8kHz_16bit/MFCC_0_D/';
+de_endpoint = 'http://www.repository.voxforge1.org/downloads/de/Trunk/Audio/MFCC/8kHz_16bit/MFCC_0_D/';
+it_endpoint = 'http://www.repository.voxforge1.org/downloads/it/Trunk/Audio/MFCC/8kHz_16bit/MFCC_0_D/';
+<<<<<<< HEAD
+endpoint = it_endpoint;
+limit = 1500;
+=======
+endpoint = en_endpoint;
+>>>>>>> c4cc2e620e82e17e56aa44a67402c178ed13742d
+
+flist = urlread(endpoint);
+
+[s,e] = regexp(flist, ">([a-zA-Z0-9]*-[a-zA-Z0-9]*)+\.tgz<");
+%truncate the amount of data to be crawled
+<<<<<<< HEAD
+s = s(1:min(limit, size(s,2)));
+e = e(1:min(limit, size(s,2)));
+
+confirm_recursive_rmdir(0)
+filename = "it.mat";
+=======
+s = s(1:1500);
+e = e(1:1500);
+
+confirm_recursive_rmdir(0)
+filename = 'en_de_it.mat';
+>>>>>>> c4cc2e620e82e17e56aa44a67402c178ed13742d
+
+function data = fetch_data(flist, endpoint, anfang, ende, id)
+	%print(int2str(id));
+    %at each step fetch a file from the corpus
+	currfile = flist(anfang + 1: ende - 1);
+	currdir = strcat("temp", int2str(id));
+     
+	mkdir(currdir);cd(currdir);
+    data = zeros(26, 1);
+	status = urlwrite(strcat(endpoint, currfile), currfile);
+ 	
+    read_size = 0;
+    %Unzip the mfc files to temp dir and add them to the dataset.
+    %TODO: only working in Linux?.
+    untar(currfile); cd(currfile(1:end-4)); cd mfc;
+    mfcs = ls("*.mfc");
+    for j=1:size(mfcs,1)
+        [d,fp,dt,tc,t]=readhtk(strtrim(mfcs(j, :)));
+	    %check if this file contains mfccs.
+	    if dt!=6 
+	        continue 
+	    else	
+            %read_size = read_size + size(d, 1);
+            data = [data, d'];
+		end
+	end
+    cd ../../..
+	rmdir(currdir, "s");
+end
+<<<<<<< HEAD
+
+=======
+>>>>>>> c4cc2e620e82e17e56aa44a67402c178ed13742d
+%here goes what to put in the output when the function fails.
+function retcode = eh(error)
+    a = error
+    retcode = zeros(26,1).+255;	
+end
+
+
+mfccs = pararrayfun(numWorkers = 30,
+<<<<<<< HEAD
+                    @(anfang, ende, id)fetch_data(flist, endpoint, anfang, ende, id), %currying with anonym funct
+                    s, e, 1:size(s,2), %parameters for the function
+                    "ErrorHandler" , @eh);
+
+read_size = size(mfccs)
+save("-mat4-binary", filename, "mfccs");
+=======
+                    @(anfang, ende, id)fetch_data(flist, endpoint, anfang, ende, id),
+					s, e, 1:size(s,2),
+					"ErrorHandler" , @eh);
+
+read_size = size(mfccs)
+save("-mat4-binary" ,filename, mfccs);
+>>>>>>> c4cc2e620e82e17e56aa44a67402c178ed13742d
diff --git a/data/readhtk.m b/data/readhtk.m
@@ -0,0 +1,15 @@
+function [d,fp,dt,tc,t]=readhtk(file)
+% READHTK routine is part of the VOICEBOX: 
+% a MATLAB toolbox for speech processing 
+% by Mike Brookes. It has not been included 
+% here due to licensing issues. Visit: 
+% http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
+% to find out more about the VOICEBOX toolbox.
+% Please remember to remove this file,
+% once you install the VOICEBOX toolbox.
+
+
+error( sprintf('To use this routine you will have to download \nand install the VOICEBOX toolbox from: \nhttp://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html\nPlease remember to remove the placeholder file once you install the VOICEBOX toolbox.') );
+
+
+% EOF
diff --git a/tests/test_example_CNN.m b/tests/test_example_CNN.m
@@ -9,25 +9,29 @@
 %% ex1 Train a 6c-2s-12c-2s Convolutional neural network 
 %will run 1 epoch in about 200 second and get around 11% error. 
 %With 100 epochs you'll get around 1.2% error
+if !isOctave() 
 rng(0)
+end
+
 cnn.layers = {
     struct('type', 'i') %input layer
     struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer
     struct('type', 's', 'scale', 2) %sub sampling layer
     struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer
     struct('type', 's', 'scale', 2) %subsampling layer
 };
-cnn = cnnsetup(cnn, train_x, train_y);
+
 
 opts.alpha = 1;
-opts.batchsize = 50;
+opts.batchsize = 250;
 opts.numepochs = 1;
 
+cnn = cnnsetup(cnn, train_x, train_y);
 cnn = cnntrain(cnn, train_x, train_y, opts);
 
 [er, bad] = cnntest(cnn, test_x, test_y);
-
+er
 %plot mean squared error
-figure; plot(cnn.rL);
-
+%figure; plot(cnn.rL);
+er
 assert(er<0.12, 'Too big error');