|
| 1 | +load=ndlMacroDefine |
| 2 | +run=ndlCreateNetwork_LSTMP_c1024_p256_x3 |
| 3 | + |
| 4 | +ndlMacroDefine=[ |
| 5 | + # Macro definitions |
| 6 | + MeanVarNorm(x)=[ |
| 7 | + xMean = Mean(x); |
| 8 | + xStdDev = InvStdDev(x) |
| 9 | + xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev) |
| 10 | + ] |
| 11 | + |
| 12 | + LogPrior(labels) |
| 13 | + { |
| 14 | + Prior=Mean(labels) |
| 15 | + LogPrior=Log(Prior) |
| 16 | + } |
| 17 | + |
| 18 | + FastKernelNN(inputDim, outputDim, cellDim, inputx, cellDimX2, cellDimX3, cellDimX4) |
| 19 | + { |
| 20 | + wx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 21 | + b = Parameter(cellDim, init=fixedValue, value=0.0); |
| 22 | + constOne = Parameter(cellDim, init=fixedValue, value=1.0,learningRateMultiplier=0.0); |
| 23 | + |
| 24 | + gwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 25 | + gb = Parameter(cellDim, init=fixedValue, value=0.0); |
| 26 | + |
| 27 | + hwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 28 | + hb = Parameter(cellDim, init=fixedValue, value=0.0); |
| 29 | + |
| 30 | + wxx = Times(wx, inputx); |
| 31 | + wxxpb = Plus(wxx, b); |
| 32 | + |
| 33 | + gwxx = Times(gwx, inputx); |
| 34 | + gwxxpb = Plus(gwxx, gb); |
| 35 | + |
| 36 | + hwxx = Times(hwx, inputx); |
| 37 | + hwxxpb = Plus(hwxx, hb); |
| 38 | + |
| 39 | + |
| 40 | + |
| 41 | + lambda = Sigmoid(gwxxpb) |
| 42 | + dcell = LCPastValue(cellDim, cell, timeStep=1, latencyStep=21) |
| 43 | + cell = Plus (ElementTimes(lambda, dcell) , ElementTimes(Minus(constOne, lambda), wxxpb)) |
| 44 | + |
| 45 | + wx2 = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 46 | + hgate = Sigmoid(hwxxpb) |
| 47 | + fhgate = Minus(constOne, hgate) |
| 48 | + output = Plus(ElementTimes(fhgate, Times(wx2,inputx)), ElementTimes(hgate, tanh(cell))) |
| 49 | + |
| 50 | + } |
| 51 | + |
| 52 | + |
| 53 | + BFastKernelNN(inputDim, outputDim, cellDim, inputx, cellDimX2, cellDimX3, cellDimX4) |
| 54 | + { |
| 55 | + wx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 56 | + b = Parameter(cellDim, init=fixedValue, value=0.0); |
| 57 | + constOne = Parameter(cellDim, init=fixedValue, value=1.0,learningRateMultiplier=0.0); |
| 58 | + |
| 59 | + gwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 60 | + gb = Parameter(cellDim, init=fixedValue, value=0.0); |
| 61 | + |
| 62 | + hwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 63 | + hb = Parameter(cellDim, init=fixedValue, value=0.0); |
| 64 | + |
| 65 | + wxx = Times(wx, inputx); |
| 66 | + wxxpb = Plus(wxx, b); |
| 67 | + |
| 68 | + gwxx = Times(gwx, inputx); |
| 69 | + gwxxpb = Plus(gwxx, gb); |
| 70 | + |
| 71 | + hwxx = Times(hwx, inputx); |
| 72 | + hwxxpb = Plus(hwxx, hb); |
| 73 | + |
| 74 | + lambda = Sigmoid(gwxxpb) |
| 75 | + dcell = FutureValue(cellDim, cell, timeStep=1) |
| 76 | + cell = Plus (ElementTimes(lambda, dcell) , ElementTimes(Minus(constOne, lambda), wxxpb)) |
| 77 | + |
| 78 | + wx2 = Parameter(cellDim, inputDim, init=uniform, initValueScale=1); |
| 79 | + hgate = Sigmoid(hwxxpb) |
| 80 | + fhgate = Minus(constOne, hgate) |
| 81 | + output = Plus(ElementTimes(fhgate, Times(wx2,inputx)), ElementTimes(hgate, tanh(cell))) |
| 82 | + |
| 83 | + } |
| 84 | + |
| 85 | +] |
| 86 | + |
| 87 | +ndlCreateNetwork_LSTMP_c1024_p256_x3=[ |
| 88 | + |
| 89 | + #define basic i/o |
| 90 | + baseFeatDim=$baseFeatDim$ |
| 91 | + RowSliceStart1=$RowSliceStart1$ |
| 92 | + RowSliceStart2=$RowSliceStart2$ |
| 93 | + FeatDim=$featDim$ |
| 94 | + labelDim=$labelDim$ |
| 95 | + cellDim=600 |
| 96 | + cellDimX2=2048 #If BrainScript is used we don't need to pass in following three values |
| 97 | + cellDimX3=3072 |
| 98 | + cellDimX4=4096 |
| 99 | + hiddenDim=1200 |
| 100 | + |
| 101 | + features=Input(FeatDim, tag=feature) |
| 102 | + labels=Input(labelDim, tag=label) |
| 103 | + |
| 104 | + #featNorm = MeanVarNorm(features) |
| 105 | + feashift1=RowSlice(RowSliceStart1, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} ) |
| 106 | + feashift2=RowSlice(RowSliceStart2, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} ) |
| 107 | + |
| 108 | + |
| 109 | + featNorm1 = MeanVarNorm(feashift1) |
| 110 | + featNorm2 = MeanVarNorm(feashift2) |
| 111 | + |
| 112 | + |
| 113 | + # layer 1 |
| 114 | + FLSTMoutput1 = FastKernelNN(baseFeatDim, hiddenDim, cellDim, featNorm1, cellDimX2, cellDimX3, cellDimX4); |
| 115 | + BLSTMoutput1 = BFastKernelNN(baseFeatDim, hiddenDim, cellDim, featNorm2, cellDimX2, cellDimX3, cellDimX4); |
| 116 | + LSTMoutput1 = RowStack(FLSTMoutput1, BLSTMoutput1) |
| 117 | + |
| 118 | + # layer 2 |
| 119 | + FLSTMoutput2 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput1, cellDimX2, cellDimX3, cellDimX4); |
| 120 | + BLSTMoutput2 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput1, cellDimX2, cellDimX3, cellDimX4); |
| 121 | + LSTMoutput2 = RowStack(FLSTMoutput2, BLSTMoutput2) |
| 122 | + |
| 123 | + # layer 3 |
| 124 | + FLSTMoutput3 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput2, cellDimX2, cellDimX3, cellDimX4); |
| 125 | + BLSTMoutput3 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput2, cellDimX2, cellDimX3, cellDimX4); |
| 126 | + LSTMoutput3 = RowStack(FLSTMoutput3, BLSTMoutput3) |
| 127 | + |
| 128 | + FLSTMoutput4 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput3, cellDimX2, cellDimX3, cellDimX4); |
| 129 | + BLSTMoutput4 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput3, cellDimX2, cellDimX3, cellDimX4); |
| 130 | + LSTMoutput4 = RowStack(FLSTMoutput4, BLSTMoutput4) |
| 131 | + |
| 132 | + FLSTMoutput5 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput4, cellDimX2, cellDimX3, cellDimX4); |
| 133 | + BLSTMoutput5 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput4, cellDimX2, cellDimX3, cellDimX4); |
| 134 | + LSTMoutput5 = RowStack(FLSTMoutput5, BLSTMoutput5) |
| 135 | + |
| 136 | + FLSTMoutput6 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput5, cellDimX2, cellDimX3, cellDimX4); |
| 137 | + BLSTMoutput6 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput5, cellDimX2, cellDimX3, cellDimX4); |
| 138 | + LSTMoutput6 = RowStack(FLSTMoutput6, BLSTMoutput6) |
| 139 | + |
| 140 | + FLSTMoutput7 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput6, cellDimX2, cellDimX3, cellDimX4); |
| 141 | + BLSTMoutput7 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput6, cellDimX2, cellDimX3, cellDimX4); |
| 142 | + LSTMoutput7 = RowStack(FLSTMoutput7, BLSTMoutput7) |
| 143 | + |
| 144 | + FLSTMoutput8 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput7, cellDimX2, cellDimX3, cellDimX4); |
| 145 | + BLSTMoutput8 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput7, cellDimX2, cellDimX3, cellDimX4); |
| 146 | + LSTMoutput8 = RowStack(FLSTMoutput8, BLSTMoutput8) |
| 147 | + |
| 148 | + FLSTMoutput9 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput8, cellDimX2, cellDimX3, cellDimX4); |
| 149 | + BLSTMoutput9 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput8, cellDimX2, cellDimX3, cellDimX4); |
| 150 | + LSTMoutput9 = RowStack(FLSTMoutput9, BLSTMoutput9) |
| 151 | + |
| 152 | + FLSTMoutput10 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput9, cellDimX2, cellDimX3, cellDimX4); |
| 153 | + BLSTMoutput10 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput9, cellDimX2, cellDimX3, cellDimX4); |
| 154 | + LSTMoutput10 = RowStack(FLSTMoutput10, BLSTMoutput10) |
| 155 | + |
| 156 | + FLSTMoutput11 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput10, cellDimX2, cellDimX3, cellDimX4); |
| 157 | + BLSTMoutput11 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput10, cellDimX2, cellDimX3, cellDimX4); |
| 158 | + LSTMoutput11 = RowStack(FLSTMoutput11, BLSTMoutput11) |
| 159 | + |
| 160 | + FLSTMoutput12 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput11, cellDimX2, cellDimX3, cellDimX4); |
| 161 | + BLSTMoutput12 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput11, cellDimX2, cellDimX3, cellDimX4); |
| 162 | + LSTMoutput12 = RowStack(FLSTMoutput12, BLSTMoutput12) |
| 163 | + |
| 164 | + |
| 165 | + W = Parameter(labelDim, hiddenDim, init=uniform, initValueScale=1); |
| 166 | + b = Parameter(labelDim, init=fixedvalue, value=0); |
| 167 | + LSTMoutputW = Plus(Times(W, LSTMoutput12), b); |
| 168 | + |
| 169 | + cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria); |
| 170 | + Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval); |
| 171 | + |
| 172 | + logPrior = LogPrior(labels) |
| 173 | + ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output) |
| 174 | + |
| 175 | +] |
| 176 | + |
0 commit comments