Skip to content

Commit d7d131e

Browse files
committed
Adding speech example.
1 parent 29be036 commit d7d131e

13 files changed

+1817
-0
lines changed

speech/BLSTM_5L.ndl

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
load=ndlMacroDefine
2+
run=ndlCreateNetwork_LSTMP_c1024_p256_x3
3+
4+
ndlMacroDefine=[
5+
# Macro definitions
6+
MeanVarNorm(x)=[
7+
xMean = Mean(x);
8+
xStdDev = InvStdDev(x)
9+
xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
10+
]
11+
12+
LogPrior(labels)
13+
{
14+
Prior=Mean(labels)
15+
LogPrior=Log(Prior)
16+
}
17+
18+
19+
LSTMPComponentForward(inputDim, outputDim, cellDim, inputx, cellDimX2, cellDimX3, cellDimX4)
20+
{
21+
wx = Parameter(cellDimX4, inputDim, init=uniform, initValueScale=1);
22+
b = Parameter(cellDimX4, init=fixedValue, value=0.0);
23+
Wh = Parameter(cellDimX4, outputDim, init=uniform, initValueScale=1);
24+
25+
Wci = Parameter(cellDim, init=uniform, initValueScale=1);
26+
Wcf = Parameter(cellDim, init=uniform, initValueScale=1);
27+
Wco = Parameter(cellDim, init=uniform, initValueScale=1);
28+
29+
dh = LCPastValue(outputDim, output, timeStep=1, latencyStep=21);
30+
dc = LCPastValue(cellDim, ct, timeStep=1, latencyStep=21);
31+
32+
wxx = Times(wx, inputx);
33+
wxxpb = Plus(wxx, b);
34+
35+
whh = Times(wh, dh);
36+
37+
wxxpbpwhh = Plus(wxxpb,whh)
38+
39+
G1 = RowSlice(0, cellDim, wxxpbpwhh)
40+
G2 = RowSlice(cellDim, cellDim, wxxpbpwhh)
41+
G3 = RowSlice(cellDimX2, cellDim, wxxpbpwhh);
42+
G4 = RowSlice(cellDimX3, cellDim, wxxpbpwhh);
43+
44+
Wcidc = DiagTimes(Wci, dc);
45+
it = Sigmoid (Plus ( G1, Wcidc));
46+
47+
bit = ElementTimes(it, Tanh( G2 ));
48+
49+
Wcfdc = DiagTimes(Wcf, dc);
50+
ft = Sigmoid( Plus (G3, Wcfdc));
51+
52+
bft = ElementTimes(ft, dc);
53+
54+
ct = Plus(bft, bit);
55+
56+
Wcoct = DiagTimes(Wco, ct);
57+
ot = Sigmoid( Plus( G4, Wcoct));
58+
59+
output = ElementTimes(ot, Tanh(ct));
60+
}
61+
62+
LSTMPComponentBackward(inputDim, outputDim, cellDim, inputx, cellDimX2, cellDimX3, cellDimX4)
63+
{
64+
wx = Parameter(cellDimX4, inputDim, init=uniform, initValueScale=1);
65+
b = Parameter(cellDimX4, init=fixedValue, value=0.0);
66+
Wh = Parameter(cellDimX4, outputDim, init=uniform, initValueScale=1);
67+
68+
Wci = Parameter(cellDim, init=uniform, initValueScale=1);
69+
Wcf = Parameter(cellDim, init=uniform, initValueScale=1);
70+
Wco = Parameter(cellDim, init=uniform, initValueScale=1);
71+
72+
dh = FutureValue(outputDim, output, timeStep=1);
73+
dc = FutureValue(cellDim, ct, timeStep=1);
74+
75+
wxx = Times(wx, inputx);
76+
wxxpb = Plus(wxx, b);
77+
78+
whh = Times(wh, dh);
79+
80+
wxxpbpwhh = Plus(wxxpb,whh)
81+
82+
G1 = RowSlice(0, cellDim, wxxpbpwhh)
83+
G2 = RowSlice(cellDim, cellDim, wxxpbpwhh)
84+
G3 = RowSlice(cellDimX2, cellDim, wxxpbpwhh);
85+
G4 = RowSlice(cellDimX3, cellDim, wxxpbpwhh);
86+
87+
Wcidc = DiagTimes(Wci, dc);
88+
it = Sigmoid (Plus ( G1, Wcidc));
89+
90+
bit = ElementTimes(it, Tanh( G2 ));
91+
92+
Wcfdc = DiagTimes(Wcf, dc);
93+
ft = Sigmoid( Plus (G3, Wcfdc));
94+
95+
bft = ElementTimes(ft, dc);
96+
97+
ct = Plus(bft, bit);
98+
99+
Wcoct = DiagTimes(Wco, ct);
100+
ot = Sigmoid( Plus( G4, Wcoct));
101+
102+
output = ElementTimes(ot, Tanh(ct));
103+
104+
}
105+
106+
]
107+
108+
ndlCreateNetwork_LSTMP_c1024_p256_x3=[
109+
110+
#define basic i/o
111+
baseFeatDim=$baseFeatDim$
112+
FeatDim=$featDim$
113+
labelDim=$labelDim$
114+
cellDim=512
115+
cellDimX2=1024 #If BrainScript is used we don't need to pass in following three values
116+
cellDimX3=1536
117+
cellDimX4=2048
118+
119+
hiddenDim=512
120+
hiddenDim2=1024
121+
122+
features=Input(FeatDim, tag=feature)
123+
labels=Input(labelDim, tag=label)
124+
#feashift=RowSlice(RowSliceStart, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} )
125+
126+
127+
featNorm = MeanVarNorm(features)
128+
129+
130+
# layer 1
131+
LSTMForwardOutput1 = LSTMPComponentForward(baseFeatDim, hiddenDim, cellDim, featNorm, cellDimX2, cellDimX3, cellDimX4);
132+
133+
LSTMBackwardOutput1 = LSTMPComponentBackward(baseFeatDim, hiddenDim, cellDim, featNorm, cellDimX2, cellDimX3, cellDimX4);
134+
135+
LSTMoutput1 = RowStack(LSTMForwardOutput1, LSTMBackwardOutput1)
136+
137+
# layer 2
138+
LSTMForwardOutput2 = LSTMPComponentForward(hiddenDim2, hiddenDim, cellDim, LSTMoutput1,cellDimX2, cellDimX3, cellDimX4);
139+
140+
LSTMBackwardOutput2 = LSTMPComponentBackward(hiddenDim2, hiddenDim, cellDim, LSTMoutput1,cellDimX2, cellDimX3, cellDimX4);
141+
142+
LSTMoutput2 = RowStack(LSTMForwardOutput2, LSTMBackwardOutput2)
143+
144+
# layer 3
145+
LSTMForwardOutput3 = LSTMPComponentForward(hiddenDim2, hiddenDim, cellDim, LSTMoutput2,cellDimX2, cellDimX3, cellDimX4);
146+
147+
LSTMBackwardOutput3 = LSTMPComponentBackward(hiddenDim2, hiddenDim, cellDim, LSTMoutput2,cellDimX2, cellDimX3, cellDimX4);
148+
149+
LSTMoutput3 = RowStack(LSTMForwardOutput3, LSTMBackwardOutput3)
150+
151+
# layer 4
152+
LSTMForwardOutput4 = LSTMPComponentForward(hiddenDim2, hiddenDim, cellDim, LSTMoutput3,cellDimX2, cellDimX3, cellDimX4);
153+
154+
LSTMBackwardOutput4 = LSTMPComponentBackward(hiddenDim2, hiddenDim, cellDim, LSTMoutput3,cellDimX2, cellDimX3, cellDimX4);
155+
156+
LSTMoutput4 = RowStack(LSTMForwardOutput4, LSTMBackwardOutput4)
157+
158+
# layer 5
159+
LSTMForwardOutput5 = LSTMPComponentForward(hiddenDim2, hiddenDim, cellDim, LSTMoutput4,cellDimX2, cellDimX3, cellDimX4);
160+
161+
LSTMBackwardOutput5 = LSTMPComponentBackward(hiddenDim2, hiddenDim, cellDim, LSTMoutput4,cellDimX2, cellDimX3, cellDimX4);
162+
163+
LSTMoutput5 = RowStack(LSTMForwardOutput5, LSTMBackwardOutput5)
164+
165+
166+
167+
W = Parameter(labelDim, hiddenDim2, init=uniform, initValueScale=1);
168+
b = Parameter(labelDim, init=fixedvalue, value=0);
169+
LSTMoutputW = Plus(Times(W, LSTMoutput5), b);
170+
171+
172+
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
173+
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
174+
175+
logPrior = LogPrior(labels)
176+
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
177+
178+
]
179+

speech/Bi-SRU_12L.ndl

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
load=ndlMacroDefine
2+
run=ndlCreateNetwork_LSTMP_c1024_p256_x3
3+
4+
ndlMacroDefine=[
5+
# Macro definitions
6+
MeanVarNorm(x)=[
7+
xMean = Mean(x);
8+
xStdDev = InvStdDev(x)
9+
xNorm=PerDimMeanVarNormalization(x,xMean,xStdDev)
10+
]
11+
12+
LogPrior(labels)
13+
{
14+
Prior=Mean(labels)
15+
LogPrior=Log(Prior)
16+
}
17+
18+
FastKernelNN(inputDim, outputDim, cellDim, inputx, cellDimX2, cellDimX3, cellDimX4)
19+
{
20+
wx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
21+
b = Parameter(cellDim, init=fixedValue, value=0.0);
22+
constOne = Parameter(cellDim, init=fixedValue, value=1.0,learningRateMultiplier=0.0);
23+
24+
gwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
25+
gb = Parameter(cellDim, init=fixedValue, value=0.0);
26+
27+
hwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
28+
hb = Parameter(cellDim, init=fixedValue, value=0.0);
29+
30+
wxx = Times(wx, inputx);
31+
wxxpb = Plus(wxx, b);
32+
33+
gwxx = Times(gwx, inputx);
34+
gwxxpb = Plus(gwxx, gb);
35+
36+
hwxx = Times(hwx, inputx);
37+
hwxxpb = Plus(hwxx, hb);
38+
39+
40+
41+
lambda = Sigmoid(gwxxpb)
42+
dcell = LCPastValue(cellDim, cell, timeStep=1, latencyStep=21)
43+
cell = Plus (ElementTimes(lambda, dcell) , ElementTimes(Minus(constOne, lambda), wxxpb))
44+
45+
wx2 = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
46+
hgate = Sigmoid(hwxxpb)
47+
fhgate = Minus(constOne, hgate)
48+
output = Plus(ElementTimes(fhgate, Times(wx2,inputx)), ElementTimes(hgate, tanh(cell)))
49+
50+
}
51+
52+
53+
BFastKernelNN(inputDim, outputDim, cellDim, inputx, cellDimX2, cellDimX3, cellDimX4)
54+
{
55+
wx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
56+
b = Parameter(cellDim, init=fixedValue, value=0.0);
57+
constOne = Parameter(cellDim, init=fixedValue, value=1.0,learningRateMultiplier=0.0);
58+
59+
gwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
60+
gb = Parameter(cellDim, init=fixedValue, value=0.0);
61+
62+
hwx = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
63+
hb = Parameter(cellDim, init=fixedValue, value=0.0);
64+
65+
wxx = Times(wx, inputx);
66+
wxxpb = Plus(wxx, b);
67+
68+
gwxx = Times(gwx, inputx);
69+
gwxxpb = Plus(gwxx, gb);
70+
71+
hwxx = Times(hwx, inputx);
72+
hwxxpb = Plus(hwxx, hb);
73+
74+
lambda = Sigmoid(gwxxpb)
75+
dcell = FutureValue(cellDim, cell, timeStep=1)
76+
cell = Plus (ElementTimes(lambda, dcell) , ElementTimes(Minus(constOne, lambda), wxxpb))
77+
78+
wx2 = Parameter(cellDim, inputDim, init=uniform, initValueScale=1);
79+
hgate = Sigmoid(hwxxpb)
80+
fhgate = Minus(constOne, hgate)
81+
output = Plus(ElementTimes(fhgate, Times(wx2,inputx)), ElementTimes(hgate, tanh(cell)))
82+
83+
}
84+
85+
]
86+
87+
ndlCreateNetwork_LSTMP_c1024_p256_x3=[
88+
89+
#define basic i/o
90+
baseFeatDim=$baseFeatDim$
91+
RowSliceStart1=$RowSliceStart1$
92+
RowSliceStart2=$RowSliceStart2$
93+
FeatDim=$featDim$
94+
labelDim=$labelDim$
95+
cellDim=600
96+
cellDimX2=2048 #If BrainScript is used we don't need to pass in following three values
97+
cellDimX3=3072
98+
cellDimX4=4096
99+
hiddenDim=1200
100+
101+
features=Input(FeatDim, tag=feature)
102+
labels=Input(labelDim, tag=label)
103+
104+
#featNorm = MeanVarNorm(features)
105+
feashift1=RowSlice(RowSliceStart1, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} )
106+
feashift2=RowSlice(RowSliceStart2, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} )
107+
108+
109+
featNorm1 = MeanVarNorm(feashift1)
110+
featNorm2 = MeanVarNorm(feashift2)
111+
112+
113+
# layer 1
114+
FLSTMoutput1 = FastKernelNN(baseFeatDim, hiddenDim, cellDim, featNorm1, cellDimX2, cellDimX3, cellDimX4);
115+
BLSTMoutput1 = BFastKernelNN(baseFeatDim, hiddenDim, cellDim, featNorm2, cellDimX2, cellDimX3, cellDimX4);
116+
LSTMoutput1 = RowStack(FLSTMoutput1, BLSTMoutput1)
117+
118+
# layer 2
119+
FLSTMoutput2 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput1, cellDimX2, cellDimX3, cellDimX4);
120+
BLSTMoutput2 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput1, cellDimX2, cellDimX3, cellDimX4);
121+
LSTMoutput2 = RowStack(FLSTMoutput2, BLSTMoutput2)
122+
123+
# layer 3
124+
FLSTMoutput3 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput2, cellDimX2, cellDimX3, cellDimX4);
125+
BLSTMoutput3 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput2, cellDimX2, cellDimX3, cellDimX4);
126+
LSTMoutput3 = RowStack(FLSTMoutput3, BLSTMoutput3)
127+
128+
FLSTMoutput4 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput3, cellDimX2, cellDimX3, cellDimX4);
129+
BLSTMoutput4 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput3, cellDimX2, cellDimX3, cellDimX4);
130+
LSTMoutput4 = RowStack(FLSTMoutput4, BLSTMoutput4)
131+
132+
FLSTMoutput5 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput4, cellDimX2, cellDimX3, cellDimX4);
133+
BLSTMoutput5 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput4, cellDimX2, cellDimX3, cellDimX4);
134+
LSTMoutput5 = RowStack(FLSTMoutput5, BLSTMoutput5)
135+
136+
FLSTMoutput6 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput5, cellDimX2, cellDimX3, cellDimX4);
137+
BLSTMoutput6 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput5, cellDimX2, cellDimX3, cellDimX4);
138+
LSTMoutput6 = RowStack(FLSTMoutput6, BLSTMoutput6)
139+
140+
FLSTMoutput7 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput6, cellDimX2, cellDimX3, cellDimX4);
141+
BLSTMoutput7 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput6, cellDimX2, cellDimX3, cellDimX4);
142+
LSTMoutput7 = RowStack(FLSTMoutput7, BLSTMoutput7)
143+
144+
FLSTMoutput8 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput7, cellDimX2, cellDimX3, cellDimX4);
145+
BLSTMoutput8 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput7, cellDimX2, cellDimX3, cellDimX4);
146+
LSTMoutput8 = RowStack(FLSTMoutput8, BLSTMoutput8)
147+
148+
FLSTMoutput9 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput8, cellDimX2, cellDimX3, cellDimX4);
149+
BLSTMoutput9 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput8, cellDimX2, cellDimX3, cellDimX4);
150+
LSTMoutput9 = RowStack(FLSTMoutput9, BLSTMoutput9)
151+
152+
FLSTMoutput10 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput9, cellDimX2, cellDimX3, cellDimX4);
153+
BLSTMoutput10 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput9, cellDimX2, cellDimX3, cellDimX4);
154+
LSTMoutput10 = RowStack(FLSTMoutput10, BLSTMoutput10)
155+
156+
FLSTMoutput11 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput10, cellDimX2, cellDimX3, cellDimX4);
157+
BLSTMoutput11 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput10, cellDimX2, cellDimX3, cellDimX4);
158+
LSTMoutput11 = RowStack(FLSTMoutput11, BLSTMoutput11)
159+
160+
FLSTMoutput12 = FastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput11, cellDimX2, cellDimX3, cellDimX4);
161+
BLSTMoutput12 = BFastKernelNN(hiddenDim, hiddenDim, cellDim, LSTMoutput11, cellDimX2, cellDimX3, cellDimX4);
162+
LSTMoutput12 = RowStack(FLSTMoutput12, BLSTMoutput12)
163+
164+
165+
W = Parameter(labelDim, hiddenDim, init=uniform, initValueScale=1);
166+
b = Parameter(labelDim, init=fixedvalue, value=0);
167+
LSTMoutputW = Plus(Times(W, LSTMoutput12), b);
168+
169+
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag=Criteria);
170+
Err = ErrorPrediction(labels,LSTMoutputW,tag=Eval);
171+
172+
logPrior = LogPrior(labels)
173+
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag=Output)
174+
175+
]
176+

0 commit comments

Comments
 (0)