Skip to content

Commit b5f91d5

Browse files
committed
Add functional convertion of SpatialConvolutionMM
1 parent 130ed2c commit b5f91d5

File tree

3 files changed

+185
-73
lines changed

3 files changed

+185
-73
lines changed

generic/SpatialConvolutionMM.c

Lines changed: 152 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,152 @@
66
# include <windows.h>
77
#endif
88

9-
#include "unfold.h"
109

1110

12-
static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput,
11+
/* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
12+
static void THNN_(unfolded_acc)(THTensor *finput, THTensor *input,
13+
int kW, int kH,
14+
int dW, int dH,
15+
int padW, int padH,
16+
int nInputPlane,
17+
int inputWidth, int inputHeight,
18+
int outputWidth, int outputHeight)
19+
{
20+
#ifdef _WIN32
21+
LONG_PTR nip;
22+
#else
23+
size_t nip;
24+
#endif
25+
26+
real *input_data = THTensor_(data)(input);
27+
real *finput_data = THTensor_(data)(finput);
28+
29+
#pragma omp parallel for private(nip)
30+
for(nip = 0; nip < nInputPlane; nip++)
31+
{
32+
size_t kw, kh, y, x;
33+
long long ix = 0, iy = 0;
34+
for(kh = 0; kh < kH; kh++)
35+
{
36+
for(kw = 0; kw < kW; kw++)
37+
{
38+
real *src = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
39+
real *dst = input_data + nip*(inputHeight*inputWidth);
40+
if (padW > 0 || padH > 0) {
41+
size_t lpad,rpad;
42+
for(y = 0; y < outputHeight; y++) {
43+
iy = (long long)(y*dH - padH + kh);
44+
if (iy < 0 || iy >= inputHeight) {
45+
} else {
46+
if (dW==1){
47+
ix = (long long)(0 - padW + kw);
48+
lpad = fmaxf(0,padW-kw);
49+
rpad = fmaxf(0,padW-(kW-kw-1));
50+
THVector_(add)(dst+(size_t)(iy*inputWidth+ix+lpad), src+(size_t)(y*outputWidth+lpad), 1, outputWidth - lpad - rpad); /* note: THVector_add could handle 1 value better */
51+
}
52+
else{
53+
for (x=0; x<outputWidth; x++){
54+
ix = (long long)(x*dW - padW + kw);
55+
if (ix < 0 || ix >= inputWidth){
56+
}else
57+
THVector_(add)(dst+(size_t)(iy*inputWidth+ix), src+(size_t)(y*outputWidth+x), 1, 1);
58+
}
59+
}
60+
}
61+
}
62+
} else {
63+
for(y = 0; y < outputHeight; y++) {
64+
iy = (long long)(y*dH + kh);
65+
ix = (long long)(0 + kw);
66+
if (dW == 1 )
67+
THVector_(add)(dst+(size_t)(iy*inputWidth+ix), src+(size_t)(y*outputWidth), 1, outputWidth); /* note: THVector_add could handle 1 value better */
68+
else{
69+
for(x = 0; x < outputWidth; x++)
70+
THVector_(add)(dst+(size_t)(iy*inputWidth+ix+x*dW), src+(size_t)(y*outputWidth+x), 1, 1);
71+
}
72+
}
73+
}
74+
}
75+
}
76+
}
77+
}
78+
79+
static void THNN_(unfolded_copy)(THTensor *finput, THTensor *input,
80+
int kW, int kH,
81+
int dW, int dH,
82+
int padW, int padH,
83+
int nInputPlane,
84+
int inputWidth, int inputHeight,
85+
int outputWidth, int outputHeight)
86+
{
87+
long k;
88+
real *input_data = THTensor_(data)(input);
89+
real *finput_data = THTensor_(data)(finput);
90+
91+
#pragma omp parallel for private(k)
92+
for(k = 0; k < nInputPlane*kH*kW; k++) {
93+
size_t nip = k / (kH*kW);
94+
size_t rest = k % (kH*kW);
95+
size_t kh = rest / kW;
96+
size_t kw = rest % kW;
97+
size_t x,y;
98+
long long ix,iy;
99+
real *dst = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
100+
real *src = input_data + nip*(inputHeight*inputWidth);
101+
if (padW > 0 || padH > 0) {
102+
size_t lpad,rpad;
103+
for(y = 0; y < outputHeight; y++) {
104+
iy = (long long)(y*dH - padH + kh);
105+
if (iy < 0 || iy >= inputHeight) {
106+
memset(dst+y*outputWidth, 0, sizeof(real)*outputWidth);
107+
} else {
108+
if (dW==1){
109+
ix = (long long)(0 - padW + kw);
110+
lpad = fmaxf(0,padW-kw);
111+
rpad = fmaxf(0,padW-(kW-kw-1));
112+
if (outputWidth-rpad-lpad <= 0) {
113+
memset(dst+(size_t)(y*outputWidth), 0, sizeof(real)*outputWidth);
114+
} else {
115+
if (lpad > 0) memset(dst+y*outputWidth, 0, sizeof(real)*lpad);
116+
memcpy(dst+(size_t)(y*outputWidth+lpad), src+(size_t)(iy*inputWidth+ix+lpad), sizeof(real)*(outputWidth-rpad-lpad));
117+
if (rpad > 0) memset(dst+y*outputWidth + outputWidth - rpad, 0, sizeof(real)*rpad);
118+
}
119+
}
120+
else{
121+
for (x=0; x<outputWidth; x++){
122+
ix = (long long)(x*dW - padW + kw);
123+
if (ix < 0 || ix >= inputWidth)
124+
memset(dst+(size_t)(y*outputWidth+x), 0, sizeof(real)*1);
125+
else
126+
memcpy(dst+(size_t)(y*outputWidth+x), src+(size_t)(iy*inputWidth+ix), sizeof(real)*(1));
127+
}
128+
}
129+
}
130+
}
131+
} else {
132+
for(y = 0; y < outputHeight; y++) {
133+
iy = (long long)(y*dH + kh);
134+
ix = (long long)(0 + kw);
135+
if (dW == 1)
136+
memcpy(dst+(size_t)(y*outputWidth), src+(size_t)(iy*inputWidth+ix), sizeof(real)*outputWidth);
137+
else{
138+
for (x=0; x<outputWidth; x++)
139+
memcpy(dst+(size_t)(y*outputWidth+x), src+(size_t)(iy*inputWidth+ix+x*dW), sizeof(real)*(1));
140+
}
141+
}
142+
}
143+
}
144+
}
145+
146+
static void THNN_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput,
13147
int kW, int kH, int dW, int dH, int padW, int padH,
14148
long nInputPlane, long inputWidth, long inputHeight,
15149
long nOutputPlane, long outputWidth, long outputHeight)
16150
{
17151
long i;
18152
THTensor *output2d;
19153

20-
nn_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
154+
THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
21155

22156
output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
23157
nOutputPlane, -1,
@@ -31,21 +165,8 @@ static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTens
31165
THTensor_(free)(output2d);
32166
}
33167

34-
static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
168+
void THNN_(SpatialConvolutionMM_updateOutput)(THNNState *state, THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor* finput, int kW, int kH, int dW, int dH, int padW, int padH)
35169
{
36-
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
37-
int kW = luaT_getfieldcheckint(L, 1, "kW");
38-
int kH = luaT_getfieldcheckint(L, 1, "kH");
39-
int dW = luaT_getfieldcheckint(L, 1, "dW");
40-
int dH = luaT_getfieldcheckint(L, 1, "dH");
41-
int padW = luaT_getfieldcheckint(L, 1, "padW");
42-
int padH = luaT_getfieldcheckint(L, 1, "padH");
43-
44-
THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
45-
THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
46-
THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
47-
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
48-
49170
int dimf = 0;
50171
int dimw = 2;
51172
int dimh = 1;
@@ -57,8 +178,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
57178
long outputWidth;
58179
long outputHeight;
59180

60-
luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
61-
181+
THArgCheck( input->nDimension == 3 || input->nDimension == 4, 1, "3D or 4D (batch mode) tensor expected");
62182

63183
if (input->nDimension == 4) {
64184
dimf++;
@@ -85,7 +205,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
85205
THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
86206
THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
87207

88-
nn_(SpatialConvolutionMM_updateOutput_frame)(input, output, weight, bias, finput,
208+
THNN_(SpatialConvolutionMM_updateOutput_frame)(input, output, weight, bias, finput,
89209
kW, kH, dW, dH, padW, padH,
90210
nInputPlane, inputWidth, inputHeight,
91211
nOutputPlane, outputWidth, outputHeight);
@@ -105,7 +225,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
105225
THTensor *output_t = THTensor_(newSelect)(output, 0, t);
106226
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
107227

108-
nn_(SpatialConvolutionMM_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
228+
THNN_(SpatialConvolutionMM_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
109229
kW, kH, dW, dH, padW, padH,
110230
nInputPlane, inputWidth, inputHeight,
111231
nOutputPlane, outputWidth, outputHeight);
@@ -115,12 +235,10 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
115235
THTensor_(free)(finput_t);
116236
}
117237
}
118-
119-
return 1;
120238
}
121239

122240

123-
static void nn_(SpatialConvolutionMM_updateGradInput_frame)(THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput,
241+
static void THNN_(SpatialConvolutionMM_updateGradInput_frame)(THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput,
124242
int kW, int kH, int dW, int dH, int padW, int padH)
125243
{
126244
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
@@ -131,25 +249,12 @@ static void nn_(SpatialConvolutionMM_updateGradInput_frame)(THTensor *gradInput,
131249

132250
THTensor_(zero)(gradInput);
133251

134-
nn_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, gradInput->size[0], gradInput->size[2], gradInput->size[1], gradOutput->size[2], gradOutput->size[1]);
252+
THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, padW, padH, gradInput->size[0], gradInput->size[2], gradInput->size[1], gradOutput->size[2], gradOutput->size[1]);
135253
}
136254

137-
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
255+
void THNN_(SpatialConvolutionMM_updateGradInput)(THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput, THTensor *weight, THTensor *bias, THTensor *finput, THTensor *fgradInput, int kW, int kH, int dW, int dH, int padW, int padH)
138256
{
139-
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
140-
THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
141-
int kW = luaT_getfieldcheckint(L, 1, "kW");
142-
int kH = luaT_getfieldcheckint(L, 1, "kH");
143-
int dW = luaT_getfieldcheckint(L, 1, "dW");
144-
int dH = luaT_getfieldcheckint(L, 1, "dH");
145-
int padW = luaT_getfieldcheckint(L, 1, "padW");
146-
int padH = luaT_getfieldcheckint(L, 1, "padH");
147-
int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
148-
149-
THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
150-
THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);
151-
THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
152-
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
257+
long nOutputPlane = weight->size[0];
153258

154259
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
155260

@@ -159,7 +264,7 @@ static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
159264

160265
if(input->nDimension == 3)
161266
{
162-
nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
267+
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH);
163268
}
164269
else
165270
{
@@ -173,7 +278,7 @@ static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
173278
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
174279
THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
175280

176-
nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);
281+
THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH);
177282

178283
THTensor_(free)(gradInput_t);
179284
THTensor_(free)(gradOutput_t);
@@ -182,11 +287,9 @@ static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
182287
}
183288

184289
THTensor_(transpose)(weight, weight, 0, 1);
185-
186-
return 1;
187290
}
188291

189-
static void nn_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput,
292+
static void THNN_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput,
190293
real scale)
191294
{
192295
long i;
@@ -211,22 +314,14 @@ static void nn_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutp
211314
THTensor_(free)(gradOutput2d);
212315
}
213316

214-
static int nn_(SpatialConvolutionMM_accGradParameters)(lua_State *L)
317+
void THNN_(SpatialConvolutionMM_accGradParameters)(THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, real scale)
215318
{
216-
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
217-
THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
218-
real scale = luaL_optnumber(L, 4, 1);
219-
int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
220-
221-
THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
222-
THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
223-
THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
224-
319+
long nOutputPlane = gradWeight->size[0];
225320
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
226321

227322
if(input->nDimension == 3)
228323
{
229-
nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
324+
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
230325
}
231326
else
232327
{
@@ -238,28 +333,12 @@ static int nn_(SpatialConvolutionMM_accGradParameters)(lua_State *L)
238333
THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
239334
THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
240335

241-
nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
336+
THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);
242337

243338
THTensor_(free)(gradOutput_t);
244339
THTensor_(free)(finput_t);
245340
}
246341
}
247-
248-
return 0;
249-
}
250-
251-
static const struct luaL_Reg nn_(SpatialConvolutionMM__) [] = {
252-
{"SpatialConvolutionMM_updateOutput", nn_(SpatialConvolutionMM_updateOutput)},
253-
{"SpatialConvolutionMM_updateGradInput", nn_(SpatialConvolutionMM_updateGradInput)},
254-
{"SpatialConvolutionMM_accGradParameters", nn_(SpatialConvolutionMM_accGradParameters)},
255-
{NULL, NULL}
256-
};
257-
258-
static void nn_(SpatialConvolutionMM_init)(lua_State *L)
259-
{
260-
luaT_pushmetatable(L, torch_Tensor);
261-
luaT_registeratname(L, nn_(SpatialConvolutionMM__), "nn");
262-
lua_pop(L,1);
263342
}
264343

265344
#endif

generic/THNN.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,34 @@ TH_API void THNN_(LookupTable_accGradParameters)(
152152
THTensor *sorted,
153153
THTensor *indices);
154154

155+
TH_API void THNN_(SpatialConvolutionMM_updateOutput)(
156+
THNNState *state,
157+
THTensor *input,
158+
THTensor *output,
159+
THTensor *weight,
160+
THTensor *bias,
161+
THTensor* finput,
162+
int kW, int kH,
163+
int dW, int dH,
164+
int padW, int padH);
165+
TH_API void THNN_(SpatialConvolutionMM_updateGradInput)(
166+
THNNState *state,
167+
THTensor *input,
168+
THTensor *gradOutput,
169+
THTensor *gradInput,
170+
THTensor *weight,
171+
THTensor *bias,
172+
THTensor *finput,
173+
THTensor *fgradInput,
174+
int kW, int kH,
175+
int dW, int dH,
176+
int padW, int padH);
177+
TH_API void THNN_(SpatialConvolutionMM_accGradParameters)(
178+
THNNState *state,
179+
THTensor *input,
180+
THTensor *gradOutput,
181+
THTensor *gradWeight,
182+
THTensor *gradBias,
183+
THTensor *finput,
184+
real scale);
155185
#endif

init.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,6 @@
3939

4040
#include "generic/LookupTable.c"
4141
#include "THGenerateFloatTypes.h"
42+
43+
#include "generic/SpatialConvolutionMM.c"
44+
#include "THGenerateFloatTypes.h"

0 commit comments

Comments
 (0)