44#include < string>
55#include " common.hpp"
66
7+ /* when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution.
8+ set false when use backbone from torchvision*/
9+ #define STRIDE_IN_1X1 true
10+
711enum RESNETTYPE {
812 R18 = 0 ,
913 R34,
@@ -44,6 +48,55 @@ int group_num = 1) {
4448 return max_pool2d;
4549}
4650
51+ ITensor* BasicBlock (INetworkDefinition *network,
52+ std::map<std::string, Weights>& weightMap,
53+ const std::string& lname,
54+ ITensor& input,
55+ int in_channels,
56+ int out_channels,
57+ int stride = 1 ) {
58+ // conv1
59+ IConvolutionLayer* conv1 = network->addConvolutionNd (input, out_channels, DimsHW{ 3 , 3 },
60+ weightMap[lname + " .conv1.weight" ],
61+ weightMap[lname + " .conv1.bias" ]);
62+ assert (conv1);
63+ conv1->setStrideNd (DimsHW{ stride, stride });
64+ conv1->setPaddingNd (DimsHW{ 1 , 1 });
65+
66+ auto r1 = network->addActivation (*conv1->getOutput (0 ), ActivationType::kRELU );
67+ assert (r1);
68+
69+ // conv2
70+ IConvolutionLayer* conv2 = network->addConvolutionNd (*r1->getOutput (0 ), out_channels, DimsHW{ 3 , 3 },
71+ weightMap[lname + " .conv2.weight" ],
72+ weightMap[lname + " .conv2.bias" ]);
73+ assert (conv2);
74+ conv2->setStrideNd (DimsHW{ 1 , 1 });
75+ conv2->setPaddingNd (DimsHW{ 1 , 1 });
76+
77+ // shortcut
78+ ITensor* shortcut_value = nullptr ;
79+ if (in_channels != out_channels) {
80+ auto shortcut = network->addConvolutionNd (input, out_channels, DimsHW{ 1 , 1 },
81+ weightMap[lname + " .shortcut.weight" ],
82+ weightMap[lname + " .shortcut.bias" ]);
83+ assert (shortcut);
84+ shortcut->setStrideNd (DimsHW{ stride, stride });
85+ shortcut_value = shortcut->getOutput (0 );
86+ } else {
87+ shortcut_value = &input;
88+ }
89+
90+ // add
91+ auto ew = network->addElementWise (*conv2->getOutput (0 ), *shortcut_value, ElementWiseOperation::kSUM );
92+ assert (ew);
93+
94+ auto r3 = network->addActivation (*ew->getOutput (0 ), ActivationType::kRELU );
95+ assert (r3);
96+
97+ return r3->getOutput (0 );
98+ }
99+
47100ITensor* BottleneckBlock (INetworkDefinition *network,
48101std::map<std::string, Weights>& weightMap,
49102const std::string& lname,
@@ -54,12 +107,14 @@ int out_channels,
54107int stride = 1 ,
55108int dilation = 1 ,
56109int group_num = 1 ) {
110+ int stride_1x1 = STRIDE_IN_1X1 ? stride : 1 ;
111+ int stride_3x3 = STRIDE_IN_1X1 ? 1 : stride;
57112 // conv1
58113 IConvolutionLayer* conv1 = network->addConvolutionNd (input, bottleneck_channels, DimsHW{ 1 , 1 },
59114 weightMap[lname + " .conv1.weight" ],
60115 weightMap[lname + " .conv1.bias" ]);
61116 assert (conv1);
62- conv1->setStrideNd (DimsHW{ stride, stride });
117+ conv1->setStrideNd (DimsHW{ stride_1x1, stride_1x1 });
63118 conv1->setNbGroups (group_num);
64119
65120 auto r1 = network->addActivation (*conv1->getOutput (0 ), ActivationType::kRELU );
@@ -70,7 +125,7 @@ int group_num = 1) {
70125 weightMap[lname + " .conv2.weight" ],
71126 weightMap[lname + " .conv2.bias" ]);
72127 assert (conv2);
73- conv2->setStrideNd (DimsHW{ 1 , 1 });
128+ conv2->setStrideNd (DimsHW{ stride_3x3, stride_3x3 });
74129 conv2->setPaddingNd (DimsHW{ 1 * dilation, 1 * dilation });
75130 conv2->setDilationNd (DimsHW{ dilation, dilation });
76131 conv2->setNbGroups (group_num);
@@ -115,21 +170,23 @@ std::map<std::string, Weights>& weightMap,
115170const std::string& lname,
116171ITensor& input,
117172int stage,
173+ RESNETTYPE resnet_type,
118174int in_channels,
119175int bottleneck_channels,
120176int out_channels,
121177int first_stride = 1 ,
122178int dilation = 1 ) {
123179 ITensor* out = &input;
124180 for (int i = 0 ; i < stage; i++) {
125- if (i == 0 )
126- out = BottleneckBlock (network, weightMap,
127- lname + " ." + std::to_string (i), *out, in_channels,
128- bottleneck_channels, out_channels, first_stride, dilation);
181+ std::string layerName = lname + " ." + std::to_string (i);
182+ int stride = i == 0 ? first_stride : 1 ;
183+
184+ if (resnet_type == R18 || resnet_type == R34)
185+ out = BasicBlock (network, weightMap, layerName, *out, in_channels, out_channels, stride);
129186 else
130- out = BottleneckBlock (network, weightMap,
131- lname + " . " + std::to_string (i), *out, in_channels,
132- bottleneck_channels, out_channels, 1 , dilation);
187+ out = BottleneckBlock (network, weightMap, layerName, *out,
188+ in_channels, bottleneck_channels, out_channels, stride, dilation);
189+
133190 in_channels = out_channels;
134191 }
135192 return out;
@@ -161,8 +218,9 @@ int res5_dilation = 1) {
161218 int first_stride = (i == 0 || (i == 3 && dilation == 2 )) ? 1 : 2 ;
162219 out = MakeStage (network, weightMap,
163220 " backbone.res" + std::to_string (i + 2 ), *out,
164- num_blocks_per_stage.at (resnet_type)[i], stem_out_channels,
165- bottleneck_channels, out_channels, first_stride, dilation);
221+ num_blocks_per_stage.at (resnet_type)[i], resnet_type,
222+ stem_out_channels, bottleneck_channels, out_channels,
223+ first_stride, dilation);
166224 stem_out_channels = out_channels;
167225 bottleneck_channels *= 2 ;
168226 out_channels *= 2 ;
0 commit comments