66#include " common.hpp"
77#include " utils.h"
88#include " calibrator.h"
9-
10- #define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
9+ # include < typeinfo >
10+ #define USE_FP32 // set USE_INT8 or USE_FP16 or USE_FP32
1111#define DEVICE 0 // GPU id
1212#define NMS_THRESH 0.4
1313#define CONF_THRESH 0.5
@@ -35,30 +35,29 @@ static int get_depth(int x, float gd) {
3535 return std::max<int >(r, 1 );
3636}
3737
38+
39+
3840ICudaEngine* build_engine (unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float & gd, float & gw, std::string& wts_name) {
3941 INetworkDefinition* network = builder->createNetworkV2 (0U );
4042
4143 // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
4244 ITensor* data = network->addInput (INPUT_BLOB_NAME, dt, Dims3{ 3 , INPUT_H, INPUT_W });
4345 assert (data);
44-
4546 std::map<std::string, Weights> weightMap = loadWeights (wts_name);
46-
4747 /* ------ yolov5 backbone------ */
48- auto focus0 = focus (network, weightMap, *data, 3 , get_width (64 , gw), 3 , " model.0" );
49- auto conv1 = convBlock (network, weightMap, *focus0->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1" );
48+ auto conv0 = convBlock (network, weightMap, *data, get_width (64 , gw), 6 , 2 , 1 , " model.0" );
49+ assert (conv0);
50+ auto conv1 = convBlock (network, weightMap, *conv0->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1" );
5051 auto bottleneck_CSP2 = C3 (network, weightMap, *conv1->getOutput (0 ), get_width (128 , gw), get_width (128 , gw), get_depth (3 , gd), true , 1 , 0.5 , " model.2" );
5152 auto conv3 = convBlock (network, weightMap, *bottleneck_CSP2->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.3" );
52- auto bottleneck_csp4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.4" );
53+ auto bottleneck_csp4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (6 , gd), true , 1 , 0.5 , " model.4" );
5354 auto conv5 = convBlock (network, weightMap, *bottleneck_csp4->getOutput (0 ), get_width (512 , gw), 3 , 2 , 1 , " model.5" );
5455 auto bottleneck_csp6 = C3 (network, weightMap, *conv5->getOutput (0 ), get_width (512 , gw), get_width (512 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.6" );
5556 auto conv7 = convBlock (network, weightMap, *bottleneck_csp6->getOutput (0 ), get_width (1024 , gw), 3 , 2 , 1 , " model.7" );
56- auto spp8 = SPP (network, weightMap, *conv7->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), 5 , 9 , 13 , " model.8" );
57-
57+ auto bottleneck_csp8 = C3 (network, weightMap, *conv7->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth ( 3 , gd), false , 1 , 0.5 , " model.8" );
58+ auto spp9 = SPPF (network, weightMap, *bottleneck_csp8-> getOutput ( 0 ), get_width ( 1024 , gw), get_width ( 1024 , gw), 5 , " model.9 " );
5859 /* ------ yolov5 head ------ */
59- auto bottleneck_csp9 = C3 (network, weightMap, *spp8->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.9" );
60- auto conv10 = convBlock (network, weightMap, *bottleneck_csp9->getOutput (0 ), get_width (512 , gw), 1 , 1 , 1 , " model.10" );
61-
60+ auto conv10 = convBlock (network, weightMap, *spp9->getOutput (0 ), get_width (512 , gw), 1 , 1 , 1 , " model.10" );
6261 auto upsample11 = network->addResize (*conv10->getOutput (0 ));
6362 assert (upsample11);
6463 upsample11->setResizeMode (ResizeMode::kNEAREST );
@@ -76,9 +75,7 @@ ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilder
7675
7776 ITensor* inputTensors16[] = { upsample15->getOutput (0 ), bottleneck_csp4->getOutput (0 ) };
7877 auto cat16 = network->addConcatenation (inputTensors16, 2 );
79-
8078 auto bottleneck_csp17 = C3 (network, weightMap, *cat16->getOutput (0 ), get_width (512 , gw), get_width (256 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.17" );
81-
8279 /* ------ detect ------ */
8380 IConvolutionLayer* det0 = network->addConvolutionNd (*bottleneck_csp17->getOutput (0 ), 3 * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1 }, weightMap[" model.24.m.0.weight" ], weightMap[" model.24.m.0.bias" ]);
8481 auto conv18 = convBlock (network, weightMap, *bottleneck_csp17->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.18" );
@@ -91,11 +88,9 @@ ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilder
9188 auto cat22 = network->addConcatenation (inputTensors22, 2 );
9289 auto bottleneck_csp23 = C3 (network, weightMap, *cat22->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.23" );
9390 IConvolutionLayer* det2 = network->addConvolutionNd (*bottleneck_csp23->getOutput (0 ), 3 * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1 }, weightMap[" model.24.m.2.weight" ], weightMap[" model.24.m.2.bias" ]);
94-
9591 auto yolo = addYoLoLayer (network, weightMap, " model.24" , std::vector<IConvolutionLayer*>{det0, det1, det2});
9692 yolo->getOutput (0 )->setName (OUTPUT_BLOB_NAME);
9793 network->markOutput (*yolo->getOutput (0 ));
98-
9994 // Build engine
10095 builder->setMaxBatchSize (maxBatchSize);
10196 config->setMaxWorkspaceSize (16 * (1 << 20 )); // 16MB
@@ -124,40 +119,35 @@ ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilder
124119
125120 return engine;
126121}
127-
122+ // v6.0
128123ICudaEngine* build_engine_p6 (unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float & gd, float & gw, std::string& wts_name) {
129124 INetworkDefinition* network = builder->createNetworkV2 (0U );
130-
131125 // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
132126 ITensor* data = network->addInput (INPUT_BLOB_NAME, dt, Dims3{ 3 , INPUT_H, INPUT_W });
133127 assert (data);
134-
135128 std::map<std::string, Weights> weightMap = loadWeights (wts_name);
136-
137129 /* ------ yolov5 backbone------ */
138- auto focus0 = focus (network, weightMap, *data, 3 , get_width (64 , gw), 3 , " model.0" );
139- auto conv1 = convBlock (network, weightMap, *focus0 ->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1" );
130+ auto conv0 = convBlock (network, weightMap, *data, get_width (64 , gw), 6 , 2 , 1 , " model.0" );
131+ auto conv1 = convBlock (network, weightMap, *conv0 ->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1" );
140132 auto c3_2 = C3 (network, weightMap, *conv1->getOutput (0 ), get_width (128 , gw), get_width (128 , gw), get_depth (3 , gd), true , 1 , 0.5 , " model.2" );
141133 auto conv3 = convBlock (network, weightMap, *c3_2->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.3" );
142- auto c3_4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.4" );
134+ auto c3_4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (6 , gd), true , 1 , 0.5 , " model.4" );
143135 auto conv5 = convBlock (network, weightMap, *c3_4->getOutput (0 ), get_width (512 , gw), 3 , 2 , 1 , " model.5" );
144136 auto c3_6 = C3 (network, weightMap, *conv5->getOutput (0 ), get_width (512 , gw), get_width (512 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.6" );
145137 auto conv7 = convBlock (network, weightMap, *c3_6->getOutput (0 ), get_width (768 , gw), 3 , 2 , 1 , " model.7" );
146138 auto c3_8 = C3 (network, weightMap, *conv7->getOutput (0 ), get_width (768 , gw), get_width (768 , gw), get_depth (3 , gd), true , 1 , 0.5 , " model.8" );
147139 auto conv9 = convBlock (network, weightMap, *c3_8->getOutput (0 ), get_width (1024 , gw), 3 , 2 , 1 , " model.9" );
148- auto spp10 = SPP (network, weightMap, *conv9->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), 3 , 5 , 7 , " model.10" );
149- auto c3_11 = C3 (network, weightMap, *spp10->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.11" );
150-
140+ auto c3_10 = C3 (network, weightMap, *conv9->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.10" );
141+ auto sppf11 = SPPF (network, weightMap, *c3_10->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), 5 , " model.11" );
151142 /* ------ yolov5 head ------ */
152- auto conv12 = convBlock (network, weightMap, *c3_11 ->getOutput (0 ), get_width (768 , gw), 1 , 1 , 1 , " model.12" );
143+ auto conv12 = convBlock (network, weightMap, *sppf11 ->getOutput (0 ), get_width (768 , gw), 1 , 1 , 1 , " model.12" );
153144 auto upsample13 = network->addResize (*conv12->getOutput (0 ));
154145 assert (upsample13);
155146 upsample13->setResizeMode (ResizeMode::kNEAREST );
156147 upsample13->setOutputDimensions (c3_8->getOutput (0 )->getDimensions ());
157148 ITensor* inputTensors14[] = { upsample13->getOutput (0 ), c3_8->getOutput (0 ) };
158149 auto cat14 = network->addConcatenation (inputTensors14, 2 );
159150 auto c3_15 = C3 (network, weightMap, *cat14->getOutput (0 ), get_width (1536 , gw), get_width (768 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.15" );
160-
161151 auto conv16 = convBlock (network, weightMap, *c3_15->getOutput (0 ), get_width (512 , gw), 1 , 1 , 1 , " model.16" );
162152 auto upsample17 = network->addResize (*conv16->getOutput (0 ));
163153 assert (upsample17);
@@ -166,7 +156,6 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
166156 ITensor* inputTensors18[] = { upsample17->getOutput (0 ), c3_6->getOutput (0 ) };
167157 auto cat18 = network->addConcatenation (inputTensors18, 2 );
168158 auto c3_19 = C3 (network, weightMap, *cat18->getOutput (0 ), get_width (1024 , gw), get_width (512 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.19" );
169-
170159 auto conv20 = convBlock (network, weightMap, *c3_19->getOutput (0 ), get_width (256 , gw), 1 , 1 , 1 , " model.20" );
171160 auto upsample21 = network->addResize (*conv20->getOutput (0 ));
172161 assert (upsample21);
@@ -175,22 +164,18 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
175164 ITensor* inputTensors21[] = { upsample21->getOutput (0 ), c3_4->getOutput (0 ) };
176165 auto cat22 = network->addConcatenation (inputTensors21, 2 );
177166 auto c3_23 = C3 (network, weightMap, *cat22->getOutput (0 ), get_width (512 , gw), get_width (256 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.23" );
178-
179167 auto conv24 = convBlock (network, weightMap, *c3_23->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.24" );
180168 ITensor* inputTensors25[] = { conv24->getOutput (0 ), conv20->getOutput (0 ) };
181169 auto cat25 = network->addConcatenation (inputTensors25, 2 );
182170 auto c3_26 = C3 (network, weightMap, *cat25->getOutput (0 ), get_width (1024 , gw), get_width (512 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.26" );
183-
184171 auto conv27 = convBlock (network, weightMap, *c3_26->getOutput (0 ), get_width (512 , gw), 3 , 2 , 1 , " model.27" );
185172 ITensor* inputTensors28[] = { conv27->getOutput (0 ), conv16->getOutput (0 ) };
186173 auto cat28 = network->addConcatenation (inputTensors28, 2 );
187174 auto c3_29 = C3 (network, weightMap, *cat28->getOutput (0 ), get_width (1536 , gw), get_width (768 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.29" );
188-
189175 auto conv30 = convBlock (network, weightMap, *c3_29->getOutput (0 ), get_width (768 , gw), 3 , 2 , 1 , " model.30" );
190176 ITensor* inputTensors31[] = { conv30->getOutput (0 ), conv12->getOutput (0 ) };
191177 auto cat31 = network->addConcatenation (inputTensors31, 2 );
192178 auto c3_32 = C3 (network, weightMap, *cat31->getOutput (0 ), get_width (2048 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.32" );
193-
194179 /* ------ detect ------ */
195180 IConvolutionLayer* det0 = network->addConvolutionNd (*c3_23->getOutput (0 ), 3 * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1 }, weightMap[" model.33.m.0.weight" ], weightMap[" model.33.m.0.bias" ]);
196181 IConvolutionLayer* det1 = network->addConvolutionNd (*c3_26->getOutput (0 ), 3 * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1 }, weightMap[" model.33.m.1.weight" ], weightMap[" model.33.m.1.bias" ]);
@@ -200,7 +185,6 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
200185 auto yolo = addYoLoLayer (network, weightMap, " model.33" , std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
201186 yolo->getOutput (0 )->setName (OUTPUT_BLOB_NAME);
202187 network->markOutput (*yolo->getOutput (0 ));
203-
204188 // Build engine
205189 builder->setMaxBatchSize (maxBatchSize);
206190 config->setMaxWorkspaceSize (16 * (1 << 20 )); // 16MB
@@ -217,10 +201,8 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
217201 std::cout << " Building engine, please wait for a while..." << std::endl;
218202 ICudaEngine* engine = builder->buildEngineWithConfig (*network, *config);
219203 std::cout << " Build engine successfully!" << std::endl;
220-
221204 // Don't need the network any more
222205 network->destroy ();
223-
224206 // Release host memory
225207 for (auto & mem : weightMap)
226208 {
0 commit comments