66# include <windows.h>
77#endif
88
9- #include "unfold.h"
109
1110
12- static void nn_ (SpatialConvolutionMM_updateOutput_frame )(THTensor * input , THTensor * output , THTensor * weight , THTensor * bias , THTensor * finput ,
11+ /* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */
12+ static void THNN_ (unfolded_acc )(THTensor * finput , THTensor * input ,
13+ int kW , int kH ,
14+ int dW , int dH ,
15+ int padW , int padH ,
16+ int nInputPlane ,
17+ int inputWidth , int inputHeight ,
18+ int outputWidth , int outputHeight )
19+ {
20+ #ifdef _WIN32
21+ LONG_PTR nip ;
22+ #else
23+ size_t nip ;
24+ #endif
25+
26+ real * input_data = THTensor_ (data )(input );
27+ real * finput_data = THTensor_ (data )(finput );
28+
29+ #pragma omp parallel for private(nip)
30+ for (nip = 0 ; nip < nInputPlane ; nip ++ )
31+ {
32+ size_t kw , kh , y , x ;
33+ long long ix = 0 , iy = 0 ;
34+ for (kh = 0 ; kh < kH ; kh ++ )
35+ {
36+ for (kw = 0 ; kw < kW ; kw ++ )
37+ {
38+ real * src = finput_data + nip * (kH * kW * outputHeight * outputWidth ) + kh * (kW * outputHeight * outputWidth ) + kw * (outputHeight * outputWidth );
39+ real * dst = input_data + nip * (inputHeight * inputWidth );
40+ if (padW > 0 || padH > 0 ) {
41+ size_t lpad ,rpad ;
42+ for (y = 0 ; y < outputHeight ; y ++ ) {
43+ iy = (long long )(y * dH - padH + kh );
44+ if (iy < 0 || iy >= inputHeight ) {
45+ } else {
46+ if (dW == 1 ){
47+ ix = (long long )(0 - padW + kw );
48+ lpad = fmaxf (0 ,padW - kw );
49+ rpad = fmaxf (0 ,padW - (kW - kw - 1 ));
50+ THVector_ (add )(dst + (size_t )(iy * inputWidth + ix + lpad ), src + (size_t )(y * outputWidth + lpad ), 1 , outputWidth - lpad - rpad ); /* note: THVector_add could handle 1 value better */
51+ }
52+ else {
53+ for (x = 0 ; x < outputWidth ; x ++ ){
54+ ix = (long long )(x * dW - padW + kw );
55+ if (ix < 0 || ix >= inputWidth ){
56+ }else
57+ THVector_ (add )(dst + (size_t )(iy * inputWidth + ix ), src + (size_t )(y * outputWidth + x ), 1 , 1 );
58+ }
59+ }
60+ }
61+ }
62+ } else {
63+ for (y = 0 ; y < outputHeight ; y ++ ) {
64+ iy = (long long )(y * dH + kh );
65+ ix = (long long )(0 + kw );
66+ if (dW == 1 )
67+ THVector_ (add )(dst + (size_t )(iy * inputWidth + ix ), src + (size_t )(y * outputWidth ), 1 , outputWidth ); /* note: THVector_add could handle 1 value better */
68+ else {
69+ for (x = 0 ; x < outputWidth ; x ++ )
70+ THVector_ (add )(dst + (size_t )(iy * inputWidth + ix + x * dW ), src + (size_t )(y * outputWidth + x ), 1 , 1 );
71+ }
72+ }
73+ }
74+ }
75+ }
76+ }
77+ }
78+
79+ static void THNN_ (unfolded_copy )(THTensor * finput , THTensor * input ,
80+ int kW , int kH ,
81+ int dW , int dH ,
82+ int padW , int padH ,
83+ int nInputPlane ,
84+ int inputWidth , int inputHeight ,
85+ int outputWidth , int outputHeight )
86+ {
87+ long k ;
88+ real * input_data = THTensor_ (data )(input );
89+ real * finput_data = THTensor_ (data )(finput );
90+
91+ #pragma omp parallel for private(k)
92+ for (k = 0 ; k < nInputPlane * kH * kW ; k ++ ) {
93+ size_t nip = k / (kH * kW );
94+ size_t rest = k % (kH * kW );
95+ size_t kh = rest / kW ;
96+ size_t kw = rest % kW ;
97+ size_t x ,y ;
98+ long long ix ,iy ;
99+ real * dst = finput_data + nip * (kH * kW * outputHeight * outputWidth ) + kh * (kW * outputHeight * outputWidth ) + kw * (outputHeight * outputWidth );
100+ real * src = input_data + nip * (inputHeight * inputWidth );
101+ if (padW > 0 || padH > 0 ) {
102+ size_t lpad ,rpad ;
103+ for (y = 0 ; y < outputHeight ; y ++ ) {
104+ iy = (long long )(y * dH - padH + kh );
105+ if (iy < 0 || iy >= inputHeight ) {
106+ memset (dst + y * outputWidth , 0 , sizeof (real )* outputWidth );
107+ } else {
108+ if (dW == 1 ){
109+ ix = (long long )(0 - padW + kw );
110+ lpad = fmaxf (0 ,padW - kw );
111+ rpad = fmaxf (0 ,padW - (kW - kw - 1 ));
112+ if (outputWidth - rpad - lpad <= 0 ) {
113+ memset (dst + (size_t )(y * outputWidth ), 0 , sizeof (real )* outputWidth );
114+ } else {
115+ if (lpad > 0 ) memset (dst + y * outputWidth , 0 , sizeof (real )* lpad );
116+ memcpy (dst + (size_t )(y * outputWidth + lpad ), src + (size_t )(iy * inputWidth + ix + lpad ), sizeof (real )* (outputWidth - rpad - lpad ));
117+ if (rpad > 0 ) memset (dst + y * outputWidth + outputWidth - rpad , 0 , sizeof (real )* rpad );
118+ }
119+ }
120+ else {
121+ for (x = 0 ; x < outputWidth ; x ++ ){
122+ ix = (long long )(x * dW - padW + kw );
123+ if (ix < 0 || ix >= inputWidth )
124+ memset (dst + (size_t )(y * outputWidth + x ), 0 , sizeof (real )* 1 );
125+ else
126+ memcpy (dst + (size_t )(y * outputWidth + x ), src + (size_t )(iy * inputWidth + ix ), sizeof (real )* (1 ));
127+ }
128+ }
129+ }
130+ }
131+ } else {
132+ for (y = 0 ; y < outputHeight ; y ++ ) {
133+ iy = (long long )(y * dH + kh );
134+ ix = (long long )(0 + kw );
135+ if (dW == 1 )
136+ memcpy (dst + (size_t )(y * outputWidth ), src + (size_t )(iy * inputWidth + ix ), sizeof (real )* outputWidth );
137+ else {
138+ for (x = 0 ; x < outputWidth ; x ++ )
139+ memcpy (dst + (size_t )(y * outputWidth + x ), src + (size_t )(iy * inputWidth + ix + x * dW ), sizeof (real )* (1 ));
140+ }
141+ }
142+ }
143+ }
144+ }
145+
146+ static void THNN_ (SpatialConvolutionMM_updateOutput_frame )(THTensor * input , THTensor * output , THTensor * weight , THTensor * bias , THTensor * finput ,
13147 int kW , int kH , int dW , int dH , int padW , int padH ,
14148 long nInputPlane , long inputWidth , long inputHeight ,
15149 long nOutputPlane , long outputWidth , long outputHeight )
16150{
17151 long i ;
18152 THTensor * output2d ;
19153
20- nn_ (unfolded_copy )(finput , input , kW , kH , dW , dH , padW , padH , nInputPlane , inputWidth , inputHeight , outputWidth , outputHeight );
154+ THNN_ (unfolded_copy )(finput , input , kW , kH , dW , dH , padW , padH , nInputPlane , inputWidth , inputHeight , outputWidth , outputHeight );
21155
22156 output2d = THTensor_ (newWithStorage2d )(output -> storage , output -> storageOffset ,
23157 nOutputPlane , -1 ,
@@ -31,21 +165,8 @@ static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTens
31165 THTensor_ (free )(output2d );
32166}
33167
34- static int nn_ (SpatialConvolutionMM_updateOutput )(lua_State * L )
168+ void THNN_ (SpatialConvolutionMM_updateOutput )(THNNState * state , THTensor * input , THTensor * output , THTensor * weight , THTensor * bias , THTensor * finput , int kW , int kH , int dW , int dH , int padW , int padH )
35169{
36- THTensor * input = luaT_checkudata (L , 2 , torch_Tensor );
37- int kW = luaT_getfieldcheckint (L , 1 , "kW" );
38- int kH = luaT_getfieldcheckint (L , 1 , "kH" );
39- int dW = luaT_getfieldcheckint (L , 1 , "dW" );
40- int dH = luaT_getfieldcheckint (L , 1 , "dH" );
41- int padW = luaT_getfieldcheckint (L , 1 , "padW" );
42- int padH = luaT_getfieldcheckint (L , 1 , "padH" );
43-
44- THTensor * finput = luaT_getfieldcheckudata (L , 1 , "finput" , torch_Tensor );
45- THTensor * weight = luaT_getfieldcheckudata (L , 1 , "weight" , torch_Tensor );
46- THTensor * bias = luaT_getfieldcheckudata (L , 1 , "bias" , torch_Tensor );
47- THTensor * output = luaT_getfieldcheckudata (L , 1 , "output" , torch_Tensor );
48-
49170 int dimf = 0 ;
50171 int dimw = 2 ;
51172 int dimh = 1 ;
@@ -57,8 +178,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
57178 long outputWidth ;
58179 long outputHeight ;
59180
60- luaL_argcheck (L , input -> nDimension == 3 || input -> nDimension == 4 , 2 , "3D or 4D(batch mode) tensor expected" );
61-
181+ THArgCheck ( input -> nDimension == 3 || input -> nDimension == 4 , 1 , "3D or 4D (batch mode) tensor expected" );
62182
63183 if (input -> nDimension == 4 ) {
64184 dimf ++ ;
@@ -85,7 +205,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
85205 THTensor_ (resize2d )(finput , kW * kH * nInputPlane , outputHeight * outputWidth );
86206 THTensor_ (resize3d )(output , nOutputPlane , outputHeight , outputWidth );
87207
88- nn_ (SpatialConvolutionMM_updateOutput_frame )(input , output , weight , bias , finput ,
208+ THNN_ (SpatialConvolutionMM_updateOutput_frame )(input , output , weight , bias , finput ,
89209 kW , kH , dW , dH , padW , padH ,
90210 nInputPlane , inputWidth , inputHeight ,
91211 nOutputPlane , outputWidth , outputHeight );
@@ -105,7 +225,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
105225 THTensor * output_t = THTensor_ (newSelect )(output , 0 , t );
106226 THTensor * finput_t = THTensor_ (newSelect )(finput , 0 , t );
107227
108- nn_ (SpatialConvolutionMM_updateOutput_frame )(input_t , output_t , weight , bias , finput_t ,
228+ THNN_ (SpatialConvolutionMM_updateOutput_frame )(input_t , output_t , weight , bias , finput_t ,
109229 kW , kH , dW , dH , padW , padH ,
110230 nInputPlane , inputWidth , inputHeight ,
111231 nOutputPlane , outputWidth , outputHeight );
@@ -115,12 +235,10 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
115235 THTensor_ (free )(finput_t );
116236 }
117237 }
118-
119- return 1 ;
120238}
121239
122240
123- static void nn_ (SpatialConvolutionMM_updateGradInput_frame )(THTensor * gradInput , THTensor * gradOutput , THTensor * weight , THTensor * fgradInput ,
241+ static void THNN_ (SpatialConvolutionMM_updateGradInput_frame )(THTensor * gradInput , THTensor * gradOutput , THTensor * weight , THTensor * fgradInput ,
124242 int kW , int kH , int dW , int dH , int padW , int padH )
125243{
126244 THTensor * gradOutput2d = THTensor_ (newWithStorage2d )(gradOutput -> storage , gradOutput -> storageOffset ,
@@ -131,25 +249,12 @@ static void nn_(SpatialConvolutionMM_updateGradInput_frame)(THTensor *gradInput,
131249
132250 THTensor_ (zero )(gradInput );
133251
134- nn_ (unfolded_acc )(fgradInput , gradInput , kW , kH , dW , dH , padW , padH , gradInput -> size [0 ], gradInput -> size [2 ], gradInput -> size [1 ], gradOutput -> size [2 ], gradOutput -> size [1 ]);
252+ THNN_ (unfolded_acc )(fgradInput , gradInput , kW , kH , dW , dH , padW , padH , gradInput -> size [0 ], gradInput -> size [2 ], gradInput -> size [1 ], gradOutput -> size [2 ], gradOutput -> size [1 ]);
135253}
136254
137- static int nn_ (SpatialConvolutionMM_updateGradInput )(lua_State * L )
255+ void THNN_ (SpatialConvolutionMM_updateGradInput )(THNNState * state , THTensor * input , THTensor * gradOutput , THTensor * gradInput , THTensor * weight , THTensor * bias , THTensor * finput , THTensor * fgradInput , int kW , int kH , int dW , int dH , int padW , int padH )
138256{
139- THTensor * input = luaT_checkudata (L , 2 , torch_Tensor );
140- THTensor * gradOutput = luaT_checkudata (L , 3 , torch_Tensor );
141- int kW = luaT_getfieldcheckint (L , 1 , "kW" );
142- int kH = luaT_getfieldcheckint (L , 1 , "kH" );
143- int dW = luaT_getfieldcheckint (L , 1 , "dW" );
144- int dH = luaT_getfieldcheckint (L , 1 , "dH" );
145- int padW = luaT_getfieldcheckint (L , 1 , "padW" );
146- int padH = luaT_getfieldcheckint (L , 1 , "padH" );
147- int nOutputPlane = luaT_getfieldcheckint (L , 1 , "nOutputPlane" );
148-
149- THTensor * finput = luaT_getfieldcheckudata (L , 1 , "finput" , torch_Tensor );
150- THTensor * fgradInput = luaT_getfieldcheckudata (L , 1 , "fgradInput" , torch_Tensor );
151- THTensor * weight = luaT_getfieldcheckudata (L , 1 , "weight" , torch_Tensor );
152- THTensor * gradInput = luaT_getfieldcheckudata (L , 1 , "gradInput" , torch_Tensor );
257+ long nOutputPlane = weight -> size [0 ];
153258
154259 THArgCheck ( nOutputPlane == gradOutput -> size [input -> nDimension == 4 ? 1 : 0 ], 1 , "Number of output features is not equal to nOutputPlane" );
155260
@@ -159,7 +264,7 @@ static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
159264
160265 if (input -> nDimension == 3 )
161266 {
162- nn_ (SpatialConvolutionMM_updateGradInput_frame )(gradInput , gradOutput , weight , fgradInput , kW , kH , dW , dH , padW , padH );
267+ THNN_ (SpatialConvolutionMM_updateGradInput_frame )(gradInput , gradOutput , weight , fgradInput , kW , kH , dW , dH , padW , padH );
163268 }
164269 else
165270 {
@@ -173,7 +278,7 @@ static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
173278 THTensor * gradOutput_t = THTensor_ (newSelect )(gradOutput , 0 , t );
174279 THTensor * fgradInput_t = THTensor_ (newSelect )(fgradInput , 0 , t );
175280
176- nn_ (SpatialConvolutionMM_updateGradInput_frame )(gradInput_t , gradOutput_t , weight , fgradInput_t , kW , kH , dW , dH , padW , padH );
281+ THNN_ (SpatialConvolutionMM_updateGradInput_frame )(gradInput_t , gradOutput_t , weight , fgradInput_t , kW , kH , dW , dH , padW , padH );
177282
178283 THTensor_ (free )(gradInput_t );
179284 THTensor_ (free )(gradOutput_t );
@@ -182,11 +287,9 @@ static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
182287 }
183288
184289 THTensor_ (transpose )(weight , weight , 0 , 1 );
185-
186- return 1 ;
187290}
188291
189- static void nn_ (SpatialConvolutionMM_accGradParameters_frame )(THTensor * gradOutput , THTensor * gradWeight , THTensor * gradBias , THTensor * finput ,
292+ static void THNN_ (SpatialConvolutionMM_accGradParameters_frame )(THTensor * gradOutput , THTensor * gradWeight , THTensor * gradBias , THTensor * finput ,
190293 real scale )
191294{
192295 long i ;
@@ -211,22 +314,14 @@ static void nn_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutp
211314 THTensor_ (free )(gradOutput2d );
212315}
213316
214- static int nn_ (SpatialConvolutionMM_accGradParameters )(lua_State * L )
317+ void THNN_ (SpatialConvolutionMM_accGradParameters )(THNNState * state , THTensor * input , THTensor * gradOutput , THTensor * gradWeight , THTensor * gradBias , THTensor * finput , real scale )
215318{
216- THTensor * input = luaT_checkudata (L , 2 , torch_Tensor );
217- THTensor * gradOutput = luaT_checkudata (L , 3 , torch_Tensor );
218- real scale = luaL_optnumber (L , 4 , 1 );
219- int nOutputPlane = luaT_getfieldcheckint (L , 1 , "nOutputPlane" );
220-
221- THTensor * finput = luaT_getfieldcheckudata (L , 1 , "finput" , torch_Tensor );
222- THTensor * gradWeight = luaT_getfieldcheckudata (L , 1 , "gradWeight" , torch_Tensor );
223- THTensor * gradBias = luaT_getfieldcheckudata (L , 1 , "gradBias" , torch_Tensor );
224-
319+ long nOutputPlane = gradWeight -> size [0 ];
225320 THArgCheck ( nOutputPlane == gradOutput -> size [input -> nDimension == 4 ? 1 : 0 ], 1 , "Number of output features is not equal to nOutputPlane" );
226321
227322 if (input -> nDimension == 3 )
228323 {
229- nn_ (SpatialConvolutionMM_accGradParameters_frame )(gradOutput , gradWeight , gradBias , finput , scale );
324+ THNN_ (SpatialConvolutionMM_accGradParameters_frame )(gradOutput , gradWeight , gradBias , finput , scale );
230325 }
231326 else
232327 {
@@ -238,28 +333,12 @@ static int nn_(SpatialConvolutionMM_accGradParameters)(lua_State *L)
238333 THTensor * gradOutput_t = THTensor_ (newSelect )(gradOutput , 0 , t );
239334 THTensor * finput_t = THTensor_ (newSelect )(finput , 0 , t );
240335
241- nn_ (SpatialConvolutionMM_accGradParameters_frame )(gradOutput_t , gradWeight , gradBias , finput_t , scale );
336+ THNN_ (SpatialConvolutionMM_accGradParameters_frame )(gradOutput_t , gradWeight , gradBias , finput_t , scale );
242337
243338 THTensor_ (free )(gradOutput_t );
244339 THTensor_ (free )(finput_t );
245340 }
246341 }
247-
248- return 0 ;
249- }
250-
251- static const struct luaL_Reg nn_ (SpatialConvolutionMM__ ) [] = {
252- {"SpatialConvolutionMM_updateOutput" , nn_ (SpatialConvolutionMM_updateOutput )},
253- {"SpatialConvolutionMM_updateGradInput" , nn_ (SpatialConvolutionMM_updateGradInput )},
254- {"SpatialConvolutionMM_accGradParameters" , nn_ (SpatialConvolutionMM_accGradParameters )},
255- {NULL , NULL }
256- };
257-
258- static void nn_ (SpatialConvolutionMM_init )(lua_State * L )
259- {
260- luaT_pushmetatable (L , torch_Tensor );
261- luaT_registeratname (L , nn_ (SpatialConvolutionMM__ ), "nn" );
262- lua_pop (L ,1 );
263342}
264343
265344#endif
0 commit comments