Skip to content

Commit 2c4fdea

Browse files
authored
update rcnn (wang-xinyu#521)
* add MaskRcnn(C4) * add MaskRcnnInference plugin for mask selecting * split ROIHeads to BOXHead and MaskHead * remove unuseful parameters in createEngine_rcnn and BuildRcnnModel * change the type of scores_h, boxes_h and classes_h from unique_ptr to vector * add doInference * add maskrcnn postprocess * update README.md * update rcnn * fix bugs with R18 and R34 add BasicBlock for R18 and R34 add STRIDE_IN_1X1, MakeStage is same with detectron2 now. * update README.md * update rcnn replace picture with url
1 parent f4c384d commit 2c4fdea

File tree

3 files changed

+102
-29
lines changed

3 files changed

+102
-29
lines changed

rcnn/README.md

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ TensorRT7.2 is recomended because Resize layer in 7.0 with kLINEAR mode is a lit
2626
// go to facebookresearch/detectron2
2727
python setup.py build develop // more install information see https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md
2828
// download https://dl.fbaipublicfiles.com/detectron2/COCO-Detection/faster_rcnn_R_50_C4_1x/137257644/model_final_721ade.pkl
29-
// copy tensorrtx/rcnn/(gen_wts.py,demo.jpg) into facebookresearch/detectron2
29+
// download https://raw.githubusercontent.com/freedenS/TestImage/main/demo.jpg
30+
// copy tensorrtx/rcnn/gen_wts.py and demo.jpg into facebookresearch/detectron2
3031
// ensure cfg.MODEL.WEIGHTS in gen_wts.py is correct
3132
// go to facebookresearch/detectron2
3233
python gen_wts.py
@@ -52,33 +53,39 @@ sudo ./rcnn -d faster.engine ../samples
5253
// sudo ./rcnn -d mask.engine ../samples m
5354
```
5455

55-
3. check the images generated, as follows. _zidane.jpg and _bus.jpg
56+
3. check the images generated, as follows. _demo.jpg and so on.
5657

5758
## Backbone
5859

5960
#### R18, R34, R152
6061

6162
```
63+
// python
6264
1.download pretrained model
6365
R18: https://download.pytorch.org/models/resnet18-f37072fd.pth
6466
R34: https://download.pytorch.org/models/resnet34-b627a593.pth
67+
R50: https://download.pytorch.org/models/resnet50-0676ba61.pth
68+
R101: https://download.pytorch.org/models/resnet101-63fe2227.pth
6569
R152: https://download.pytorch.org/models/resnet152-394f9c45.pth
6670
2.convert pth to pkl by facebookresearch/detectron2/tools/convert-torchvision-to-d2.py
6771
3.set merge_from_file in gen_wts.py
6872
./configs/COCO-Detections/faster_rcnn_R_50_C4_1x.yaml for fasterRcnn
6973
./configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml for maskRcnn
70-
4.set cfg.MODEL.RESNETS.DEPTH = 18(34,152),
74+
4.set cfg.MODEL.RESNETS.DEPTH = 18(34,50,101,152),
7175
cfg.MODEL.RESNETS.STRIDE_IN_1X1 = False,
72-
cfg.MODEL.RESNETS.RES2_OUT_CHANNELS = 64, // for R18, R34
76+
cfg.MODEL.RESNETS.RES2_OUT_CHANNELS = 64, // for R18, R34; 256 for others
7377
cfg.MODEL.PIXEL_MEAN = [123.675, 116.280, 103.530],
7478
cfg.MODEL.PIXEL_STD = [58.395, 57.120, 57.375],
7579
cfg.INPUT.FORMAT = "RGB"
7680
and then train your own model
77-
5.set BACKBONE_RESNETTYPE = R18(R34, R152) in rcnn.cpp line 13
78-
6.modify PIXEL_MEAN and PIXEL_STD in rcnn.cpp
79-
7.set res2_out_channels=64 in BuildResNet in rcnn.cpp line 239 // for R18, R34
80-
8.generate wts file from your own model and build your engine, refer to how to run
81-
9.convert your image to RGB before inference
81+
5.generate your wts file.
82+
// c++
83+
6.set BACKBONE_RESNETTYPE = R18(R34,R50,R101,R152) in rcnn.cpp line 14
84+
7.modify PIXEL_MEAN and PIXEL_STD in rcnn.cpp
85+
8.set STRIDE_IN_1X1=false in backbone.hpp line 9
86+
9.set other parameters if it's not same with default
87+
10.build your engine, refer to how to run
88+
11.convert your image to RGB before inference
8289
```
8390

8491
#### R50, R101
@@ -95,7 +102,8 @@ sudo ./rcnn -d faster.engine ../samples
95102
R50-mask: ./configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml
96103
R101-mask: ./configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml
97104
3.set BACKBONE_RESNETTYPE = R50(R101) rcnn.cpp line 13
98-
4.follow how to run
105+
4.set STRIDE_IN_1X1=true in backbone.hpp
106+
5.follow how to run
99107
```
100108

101109
## NOTE
@@ -114,7 +122,7 @@ sudo ./rcnn -d faster.engine ../samples
114122

115123
- if you want to use maskrcnn with cuda10.2, please be sure that you have upgraded cuda to the latest patch. see https://github.com/NVIDIA/TensorRT/issues/1151 for detail.
116124

117-
- you can only build fasterRcnn part with maskRcnn weights file.
125+
- you can build fasterRcnn with maskRcnn weights file.
118126

119127
## Quantization
120128

rcnn/backbone.hpp

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
#include <string>
55
#include "common.hpp"
66

7+
/* when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution.
8+
set false when use backbone from torchvision*/
9+
#define STRIDE_IN_1X1 true
10+
711
enum RESNETTYPE {
812
R18 = 0,
913
R34,
@@ -44,6 +48,55 @@ int group_num = 1) {
4448
return max_pool2d;
4549
}
4650

51+
ITensor* BasicBlock(INetworkDefinition *network,
52+
std::map<std::string, Weights>& weightMap,
53+
const std::string& lname,
54+
ITensor& input,
55+
int in_channels,
56+
int out_channels,
57+
int stride = 1) {
58+
// conv1
59+
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 3, 3 },
60+
weightMap[lname + ".conv1.weight"],
61+
weightMap[lname + ".conv1.bias"]);
62+
assert(conv1);
63+
conv1->setStrideNd(DimsHW{ stride, stride });
64+
conv1->setPaddingNd(DimsHW{ 1, 1 });
65+
66+
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
67+
assert(r1);
68+
69+
// conv2
70+
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), out_channels, DimsHW{ 3, 3 },
71+
weightMap[lname + ".conv2.weight"],
72+
weightMap[lname + ".conv2.bias"]);
73+
assert(conv2);
74+
conv2->setStrideNd(DimsHW{ 1, 1 });
75+
conv2->setPaddingNd(DimsHW{ 1, 1 });
76+
77+
// shortcut
78+
ITensor* shortcut_value = nullptr;
79+
if (in_channels != out_channels) {
80+
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
81+
weightMap[lname + ".shortcut.weight"],
82+
weightMap[lname + ".shortcut.bias"]);
83+
assert(shortcut);
84+
shortcut->setStrideNd(DimsHW{ stride, stride });
85+
shortcut_value = shortcut->getOutput(0);
86+
} else {
87+
shortcut_value = &input;
88+
}
89+
90+
// add
91+
auto ew = network->addElementWise(*conv2->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
92+
assert(ew);
93+
94+
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
95+
assert(r3);
96+
97+
return r3->getOutput(0);
98+
}
99+
47100
ITensor* BottleneckBlock(INetworkDefinition *network,
48101
std::map<std::string, Weights>& weightMap,
49102
const std::string& lname,
@@ -54,12 +107,14 @@ int out_channels,
54107
int stride = 1,
55108
int dilation = 1,
56109
int group_num = 1) {
110+
int stride_1x1 = STRIDE_IN_1X1 ? stride : 1;
111+
int stride_3x3 = STRIDE_IN_1X1 ? 1 : stride;
57112
// conv1
58113
IConvolutionLayer* conv1 = network->addConvolutionNd(input, bottleneck_channels, DimsHW{ 1, 1 },
59114
weightMap[lname + ".conv1.weight"],
60115
weightMap[lname + ".conv1.bias"]);
61116
assert(conv1);
62-
conv1->setStrideNd(DimsHW{ stride, stride });
117+
conv1->setStrideNd(DimsHW{ stride_1x1, stride_1x1 });
63118
conv1->setNbGroups(group_num);
64119

65120
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
@@ -70,7 +125,7 @@ int group_num = 1) {
70125
weightMap[lname + ".conv2.weight"],
71126
weightMap[lname + ".conv2.bias"]);
72127
assert(conv2);
73-
conv2->setStrideNd(DimsHW{ 1, 1 });
128+
conv2->setStrideNd(DimsHW{ stride_3x3, stride_3x3 });
74129
conv2->setPaddingNd(DimsHW{ 1 * dilation, 1 * dilation });
75130
conv2->setDilationNd(DimsHW{ dilation, dilation });
76131
conv2->setNbGroups(group_num);
@@ -115,21 +170,23 @@ std::map<std::string, Weights>& weightMap,
115170
const std::string& lname,
116171
ITensor& input,
117172
int stage,
173+
RESNETTYPE resnet_type,
118174
int in_channels,
119175
int bottleneck_channels,
120176
int out_channels,
121177
int first_stride = 1,
122178
int dilation = 1) {
123179
ITensor* out = &input;
124180
for (int i = 0; i < stage; i++) {
125-
if (i == 0)
126-
out = BottleneckBlock(network, weightMap,
127-
lname + "." + std::to_string(i), *out, in_channels,
128-
bottleneck_channels, out_channels, first_stride, dilation);
181+
std::string layerName = lname + "." + std::to_string(i);
182+
int stride = i == 0 ? first_stride : 1;
183+
184+
if (resnet_type == R18 || resnet_type == R34)
185+
out = BasicBlock(network, weightMap, layerName, *out, in_channels, out_channels, stride);
129186
else
130-
out = BottleneckBlock(network, weightMap,
131-
lname + "." + std::to_string(i), *out, in_channels,
132-
bottleneck_channels, out_channels, 1, dilation);
187+
out = BottleneckBlock(network, weightMap, layerName, *out,
188+
in_channels, bottleneck_channels, out_channels, stride, dilation);
189+
133190
in_channels = out_channels;
134191
}
135192
return out;
@@ -161,8 +218,9 @@ int res5_dilation = 1) {
161218
int first_stride = (i == 0 || (i == 3 && dilation == 2)) ? 1 : 2;
162219
out = MakeStage(network, weightMap,
163220
"backbone.res" + std::to_string(i + 2), *out,
164-
num_blocks_per_stage.at(resnet_type)[i], stem_out_channels,
165-
bottleneck_channels, out_channels, first_stride, dilation);
221+
num_blocks_per_stage.at(resnet_type)[i], resnet_type,
222+
stem_out_channels, bottleneck_channels, out_channels,
223+
first_stride, dilation);
166224
stem_out_channels = out_channels;
167225
bottleneck_channels *= 2;
168226
out_channels *= 2;

rcnn/rcnn.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ static constexpr int INPUT_H = 480;
2222
static constexpr int INPUT_W = 640;
2323
static int IMAGE_HEIGHT = 800;
2424
static int IMAGE_WIDTH = 1333;
25+
// backbone
26+
static const int RES2_OUT_CHANNELS = (BACKBONE_RESNETTYPE == R18 ||
27+
BACKBONE_RESNETTYPE == R34) ? 64 : 256;
2528
// rpn
2629
static const std::vector<float> ANCHOR_SIZES = { 32, 64, 128, 256, 512 };
2730
static const std::vector<float> ASPECT_RATIOS = { 0.5, 1.0, 2.0 };
@@ -132,14 +135,13 @@ void calculateRatio() {
132135
}
133136

134137
ITensor* RPN(INetworkDefinition *network,
135-
std::map<std::string, Weights>& weightMap, ITensor& features,
136-
int out_channels = 256) {
138+
std::map<std::string, Weights>& weightMap, ITensor& features) {
137139
int num_anchors = ANCHOR_SIZES.size() * ASPECT_RATIOS.size();
138140
int box_dim = 4;
139141

140142
// rpn head conv
141-
auto rpn_head_conv = network->addConvolutionNd(features, out_channels,
142-
DimsHW{ 3, 3 }, weightMap["proposal_generator.rpn_head.conv.weight"],
143+
auto rpn_head_conv = network->addConvolutionNd(features, features.getDimensions().d[0], DimsHW{ 3, 3 },
144+
weightMap["proposal_generator.rpn_head.conv.weight"],
143145
weightMap["proposal_generator.rpn_head.conv.bias"]);
144146
assert(rpn_head_conv);
145147
rpn_head_conv->setStrideNd(DimsHW{ 1, 1 });
@@ -185,8 +187,13 @@ ITensor* proposals, ITensor* features, int num_proposals) {
185187
auto roiAlignLayer = network->addPluginV2(roi_inputs.data(), roi_inputs.size(), roiAlignPlugin);
186188

187189
// res5
190+
/* same with https://github.com/facebookresearch/detectron2/
191+
blob/9246ebc3af1c023cfbdae77e5d976edbcf9a2933/detectron2/modeling/roi_heads/roi_heads.py#L430,
192+
use bottleneck here, so pass R50*/
188193
auto box_features = MakeStage(network, weightMap, "roi_heads.res5",
189-
*roiAlignLayer->getOutput(0), 3, 1024, 512, 2048, 2);
194+
*roiAlignLayer->getOutput(0), 3, R50,
195+
roiAlignLayer->getOutput(0)->getDimensions().d[1],
196+
512, RES2_OUT_CHANNELS * 8, 2);
190197
return box_features;
191198
}
192199

@@ -293,9 +300,9 @@ ICudaEngine* createEngine_rcnn(unsigned int maxBatchSize,
293300
loadWeights(wtsfile, weightMap);
294301

295302
// backbone
296-
ITensor* features = BuildResNet(network, weightMap, *data, BACKBONE_RESNETTYPE, 64, 64, 256);
303+
ITensor* features = BuildResNet(network, weightMap, *data, BACKBONE_RESNETTYPE, 64, 64, RES2_OUT_CHANNELS);
297304

298-
auto proposals = RPN(network, weightMap, *features, 1024);
305+
auto proposals = RPN(network, weightMap, *features);
299306
auto results = ROIHeads(network, weightMap, proposals, features);
300307

301308
// build output

0 commit comments

Comments
 (0)