Skip to content

Commit 96a6061

Browse files
committed
Merge pull request deeplearning4j#889 from deeplearning4j/canovatravis
clean up optimization and deprecations
2 parents 1fe2155 + b9e5507 commit 96a6061

File tree

55 files changed

+620
-396
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+620
-396
lines changed

.travis.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ jdk:
1010
# for running tests on Travis CI container infrastructure for faster builds
1111
sudo: true
1212

13-
13+
env:
14+
global:
15+
JAVA_OPTS=-Xmx2g
1416

1517
before_install:
1618
- sudo apt-get install build-essential git

deeplearning4j-cli/deeplearning4j-cli-api/src/main/java/org/deeplearning4j/cli/subcommands/Train.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ public void execLocal() {
170170
MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(modelPath)));
171171
FeedForwardLayer outputLayer = (FeedForwardLayer) conf.getConf(conf.getConfs().size() - 1).getLayer();
172172

173-
DataSetIterator iter = new RecordReaderDataSetIterator( reader , conf.getConf(0).getBatchSize(),-1, outputLayer.getNOut());
173+
DataSetIterator iter = new RecordReaderDataSetIterator( reader ,1,-1, outputLayer.getNOut());
174174

175175
MultiLayerNetwork network = new MultiLayerNetwork(conf);
176176
if(verbose) {
@@ -195,7 +195,7 @@ public void execLocal() {
195195
NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(FileUtils.readFileToString(new File(modelPath)));
196196
LayerFactory factory = LayerFactories.getFactory(conf);
197197
Layer l = factory.create(conf);
198-
DataSetIterator iter = new RecordReaderDataSetIterator( reader , conf.getBatchSize());
198+
DataSetIterator iter = new RecordReaderDataSetIterator( reader , 1);
199199
while(iter.hasNext()) {
200200
l.fit(iter.next().getFeatureMatrix());
201201
}

deeplearning4j-cli/deeplearning4j-cli-api/src/test/java/org/deeplearning4j/cli/TrainMultiLayerConfigTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ public void testMultiLayerConfig() throws Exception {
4646
Model testModelFlag = new Model();
4747
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
4848
.iterations(100)
49-
.batchSize(10)
5049
.learningRate(1e-1f).momentum(0.9).regularization(true).l2(2e-4)
5150
.optimizationAlgo(OptimizationAlgorithm.LBFGS).constrainGradientToUnitNorm(true)
5251
.list(2)

deeplearning4j-core/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,13 @@ public static boolean checkGradients( MultiLayerNetwork mln, double epsilon, dou
6060

6161
if(useUpdater) {
6262
Updater updater = UpdaterCreator.getUpdater(mln);
63-
updater.update(mln, gradAndScore.getFirst(), 0);
63+
updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize());
6464
}
6565

6666
INDArray gradientToCheck = gradAndScore.getFirst().gradient();
6767
INDArray originalParams = mln.params();
6868

69-
int nParams = mln.numParams();
69+
int nParams = originalParams.length();
7070

7171
int totalNFailures = 0;
7272
double maxError = 0.0;

deeplearning4j-core/src/main/java/org/deeplearning4j/nn/api/Updater.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
public interface Updater extends Serializable {
1313
/**
1414
* Updater: updates the model
15+
*
1516
* @param layer
1617
* @param gradient
17-
* @param iteration
18+
* @param iteration
1819
*/
19-
void update(Layer layer,Gradient gradient,int iteration);
20+
void update(Layer layer, Gradient gradient, int iteration, int miniBatchSize);
2021

2122
}

deeplearning4j-core/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,18 @@ public interface InputPreProcessor extends Serializable, Cloneable {
5454
/**
5555
* Pre preProcess input/activations for a multi layer network
5656
* @param input the input to pre preProcess
57+
* @param miniBatchSize
5758
* @return the processed input
5859
*/
59-
INDArray preProcess(INDArray input, Layer layer);
60+
INDArray preProcess(INDArray input, int miniBatchSize);
6061

6162
/**Reverse the preProcess during backprop. Process Gradient/epsilons before
6263
* passing them to the layer below.
63-
* @param output which is a pair of the gradient and epsilon
64+
* @param output which is a pair of the gradient and epsilon
65+
* @param miniBatchSize
6466
* @return the reverse of the pre preProcess step (if any)
6567
*/
66-
INDArray backprop(INDArray output, Layer layer);
68+
INDArray backprop(INDArray output, int miniBatchSize);
6769

6870
InputPreProcessor clone();
6971
}

deeplearning4j-core/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ public class NeuralNetConfiguration implements Serializable,Cloneable {
5555

5656
protected Layer layer;
5757
//batch size: primarily used for conv nets. Will be reinforced if set.
58-
protected int batchSize = 1;
5958
protected boolean miniBatch = true;
6059
protected int numIterations = 5;
6160
//number of line search iterations
@@ -65,8 +64,6 @@ public class NeuralNetConfiguration implements Serializable,Cloneable {
6564
//gradient keys used for ensuring order when getting and setting the gradient
6665
protected List<String> variables = new ArrayList<>();
6766
//whether to constrain the gradient to unit norm or not
68-
@Deprecated
69-
protected boolean constrainGradientToUnitNorm = false;
7067
//adadelta - weight for how much to consider previous history
7168
protected StepFunction stepFunction;
7269
protected boolean useRegularization = false;
@@ -285,7 +282,6 @@ public static class Builder implements Cloneable {
285282
private double adamMeanDecay = 0.9;
286283
private double adamVarDecay = 0.999;
287284
private Layer layer;
288-
private int batchSize = 1;
289285
private boolean miniBatch = true;
290286
private int numIterations = 5;
291287
private int maxNumLineSearchIterations = 5;
@@ -352,11 +348,7 @@ public Builder maxNumLineSearchIterations(int maxNumLineSearchIterations) {
352348
return this;
353349
}
354350

355-
/** Minibatch size. Number of examples in a batch.*/
356-
public Builder batchSize(int batchSize) {
357-
this.batchSize = batchSize;
358-
return this;
359-
}
351+
360352

361353
/** Layer class. */
362354
public Builder layer(Layer layer) {
@@ -590,13 +582,11 @@ public NeuralNetConfiguration build() {
590582

591583
conf.minimize = minimize;
592584
conf.maxNumLineSearchIterations = maxNumLineSearchIterations;
593-
conf.batchSize = batchSize;
594585
conf.layer = layer;
595586
conf.numIterations = numIterations;
596587
conf.useRegularization = useRegularization;
597588
conf.useSchedules = useSchedules;
598589
conf.optimizationAlgo = optimizationAlgo;
599-
conf.constrainGradientToUnitNorm = constrainGradientToUnitNorm;
600590
conf.seed = seed;
601591
conf.timeSeriesLength = timeSeriesLength;
602592
conf.stepFunction = stepFunction;

deeplearning4j-core/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BinomialSamplingPreProcessor.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import lombok.Data;
2323

24-
import org.deeplearning4j.nn.api.Layer;
2524
import org.nd4j.linalg.api.ndarray.INDArray;
2625
import org.nd4j.linalg.factory.Nd4j;
2726

@@ -33,13 +32,13 @@
3332
public class BinomialSamplingPreProcessor extends BaseInputPreProcessor {
3433

3534
@Override
36-
public INDArray preProcess(INDArray input, Layer layer) {
35+
public INDArray preProcess(INDArray input, int miniBatchSize) {
3736
return Nd4j.getDistributions().createBinomial(1, input).sample(input.shape());
3837
}
3938

4039

4140
@Override
42-
public INDArray backprop(INDArray output, Layer layer) {
41+
public INDArray backprop(INDArray output, int miniBatchSize) {
4342
return output; //No op?
4443
}
4544
}

deeplearning4j-core/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
import lombok.Data;
2525

26-
import org.deeplearning4j.nn.api.Layer;
2726
import org.deeplearning4j.nn.conf.InputPreProcessor;
2827
import org.nd4j.linalg.api.ndarray.INDArray;
2928
import org.nd4j.linalg.api.shape.Shape;
@@ -78,7 +77,7 @@ public CnnToFeedForwardPreProcessor(){}
7877

7978
@Override
8079
// return 2 dimensions
81-
public INDArray preProcess(INDArray input, Layer layer) {
80+
public INDArray preProcess(INDArray input, int miniBatchSize) {
8281
int[] otherOutputs = null;
8382

8483
//this.inputHeight = input.size(-2);
@@ -104,7 +103,7 @@ else if(input.shape().length == 3) {
104103
}
105104

106105
@Override
107-
public INDArray backprop(INDArray output, Layer layer){
106+
public INDArray backprop(INDArray output, int miniBatchSize){
108107
if (output.shape().length == 4)
109108
return output;
110109
if (output.columns() != inputWidth * inputHeight * numChannels)

deeplearning4j-core/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22

33
import com.fasterxml.jackson.annotation.JsonCreator;
44
import com.fasterxml.jackson.annotation.JsonProperty;
5-
import lombok.AllArgsConstructor;
65
import lombok.Data;
7-
import org.deeplearning4j.nn.api.Layer;
86
import org.deeplearning4j.nn.conf.InputPreProcessor;
97
import org.nd4j.linalg.api.ndarray.INDArray;
108

@@ -41,20 +39,19 @@ public CnnToRnnPreProcessor(@JsonProperty("inputHeight") int inputHeight,
4139
}
4240

4341
@Override
44-
public INDArray preProcess(INDArray input, Layer layer) {
42+
public INDArray preProcess(INDArray input, int miniBatchSize) {
4543
if(input.rank() != 4) throw new IllegalArgumentException("Invalid input: expect CNN activations with rank 4 (received input with shape "
4644
+ Arrays.toString(input.shape())+")");
4745
//Input: 4d activations (CNN)
4846
//Output: 3d activations (RNN)
4947

5048
int[] shape = input.shape(); //[timeSeriesLength*miniBatchSize, numChannels, inputHeight, inputWidth]
51-
int miniBatchSize = layer.getInputMiniBatchSize();
5249
INDArray reshaped = input.reshape(miniBatchSize,shape[0]/miniBatchSize,product);
5350
return reshaped.permute(0,2,1);
5451
}
5552

5653
@Override
57-
public INDArray backprop(INDArray output, Layer layer) {
54+
public INDArray backprop(INDArray output, int miniBatchSize) {
5855
int[] shape = output.shape();
5956
INDArray output2d;
6057
if(shape[0]==1){

0 commit comments

Comments
 (0)