66#include  " common.hpp" 
77#include  " utils.h" 
88#include  " calibrator.h" 
9- 
10- #define  USE_FP16   //  set USE_INT8 or USE_FP16 or USE_FP32
9+ # include   < typeinfo > 
10+ #define  USE_FP32   //  set USE_INT8 or USE_FP16 or USE_FP32
1111#define  DEVICE  0   //  GPU id
1212#define  NMS_THRESH  0.4 
1313#define  CONF_THRESH  0.5 
@@ -35,30 +35,29 @@ static int get_depth(int x, float gd) {
3535    return  std::max<int >(r, 1 );
3636}
3737
38+ 
39+ 
3840ICudaEngine* build_engine (unsigned  int  maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float & gd, float & gw, std::string& wts_name) {
3941    INetworkDefinition* network = builder->createNetworkV2 (0U );
4042
4143    //  Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
4244    ITensor* data = network->addInput (INPUT_BLOB_NAME, dt, Dims3{ 3 , INPUT_H, INPUT_W });
4345    assert (data);
44- 
4546    std::map<std::string, Weights> weightMap = loadWeights (wts_name);
46- 
4747    /*  ------ yolov5 backbone------ */ 
48-     auto  focus0 = focus (network, weightMap, *data, 3 , get_width (64 , gw), 3 , " model.0"  );
49-     auto  conv1 = convBlock (network, weightMap, *focus0->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1"  );
48+     auto  conv0 = convBlock (network, weightMap, *data,  get_width (64 , gw), 6 , 2 , 1 ,  " model.0"  );
49+     assert (conv0);
50+     auto  conv1 = convBlock (network, weightMap, *conv0->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1"  );
5051    auto  bottleneck_CSP2 = C3 (network, weightMap, *conv1->getOutput (0 ), get_width (128 , gw), get_width (128 , gw), get_depth (3 , gd), true , 1 , 0.5 , " model.2"  );
5152    auto  conv3 = convBlock (network, weightMap, *bottleneck_CSP2->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.3"  );
52-     auto  bottleneck_csp4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.4"  );
53+     auto  bottleneck_csp4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (6 , gd), true , 1 , 0.5 , " model.4"  );
5354    auto  conv5 = convBlock (network, weightMap, *bottleneck_csp4->getOutput (0 ), get_width (512 , gw), 3 , 2 , 1 , " model.5"  );
5455    auto  bottleneck_csp6 = C3 (network, weightMap, *conv5->getOutput (0 ), get_width (512 , gw), get_width (512 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.6"  );
5556    auto  conv7 = convBlock (network, weightMap, *bottleneck_csp6->getOutput (0 ), get_width (1024 , gw), 3 , 2 , 1 , " model.7"  );
56-     auto  spp8  = SPP (network, weightMap, *conv7->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), 5 ,  9 ,  13 , " model.8"  );
57- 
57+     auto  bottleneck_csp8  = C3 (network, weightMap, *conv7->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth ( 3 , gd),  false ,  1 ,  0.5 , " model.8"  );
58+      auto  spp9 =  SPPF (network, weightMap, *bottleneck_csp8-> getOutput ( 0 ),  get_width ( 1024 , gw),  get_width ( 1024 , gw),  5 ,  " model.9 " ); 
5859    /*  ------ yolov5 head ------ */ 
59-     auto  bottleneck_csp9 = C3 (network, weightMap, *spp8->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.9"  );
60-     auto  conv10 = convBlock (network, weightMap, *bottleneck_csp9->getOutput (0 ), get_width (512 , gw), 1 , 1 , 1 , " model.10"  );
61- 
60+     auto  conv10 = convBlock (network, weightMap, *spp9->getOutput (0 ), get_width (512 , gw), 1 , 1 , 1 , " model.10"  );
6261    auto  upsample11 = network->addResize (*conv10->getOutput (0 ));
6362    assert (upsample11);
6463    upsample11->setResizeMode (ResizeMode::kNEAREST );
@@ -76,9 +75,7 @@ ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilder
7675
7776    ITensor* inputTensors16[] = { upsample15->getOutput (0 ), bottleneck_csp4->getOutput (0 ) };
7877    auto  cat16 = network->addConcatenation (inputTensors16, 2 );
79- 
8078    auto  bottleneck_csp17 = C3 (network, weightMap, *cat16->getOutput (0 ), get_width (512 , gw), get_width (256 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.17"  );
81- 
8279    /*  ------ detect ------ */ 
8380    IConvolutionLayer* det0 = network->addConvolutionNd (*bottleneck_csp17->getOutput (0 ), 3  * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1  }, weightMap[" model.24.m.0.weight"  ], weightMap[" model.24.m.0.bias"  ]);
8481    auto  conv18 = convBlock (network, weightMap, *bottleneck_csp17->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.18"  );
@@ -91,11 +88,9 @@ ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilder
9188    auto  cat22 = network->addConcatenation (inputTensors22, 2 );
9289    auto  bottleneck_csp23 = C3 (network, weightMap, *cat22->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.23"  );
9390    IConvolutionLayer* det2 = network->addConvolutionNd (*bottleneck_csp23->getOutput (0 ), 3  * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1  }, weightMap[" model.24.m.2.weight"  ], weightMap[" model.24.m.2.bias"  ]);
94- 
9591    auto  yolo = addYoLoLayer (network, weightMap, " model.24"  , std::vector<IConvolutionLayer*>{det0, det1, det2});
9692    yolo->getOutput (0 )->setName (OUTPUT_BLOB_NAME);
9793    network->markOutput (*yolo->getOutput (0 ));
98- 
9994    //  Build engine
10095    builder->setMaxBatchSize (maxBatchSize);
10196    config->setMaxWorkspaceSize (16  * (1  << 20 ));  //  16MB
@@ -124,40 +119,35 @@ ICudaEngine* build_engine(unsigned int maxBatchSize, IBuilder* builder, IBuilder
124119
125120    return  engine;
126121}
127- 
122+ // v6.0 
128123ICudaEngine* build_engine_p6 (unsigned  int  maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt, float & gd, float & gw, std::string& wts_name) {
129124    INetworkDefinition* network = builder->createNetworkV2 (0U );
130- 
131125    //  Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
132126    ITensor* data = network->addInput (INPUT_BLOB_NAME, dt, Dims3{ 3 , INPUT_H, INPUT_W });
133127    assert (data);
134- 
135128    std::map<std::string, Weights> weightMap = loadWeights (wts_name);
136- 
137129    /*  ------ yolov5 backbone------ */ 
138-     auto  focus0  = focus (network, weightMap, *data, 3 ,  get_width (64 , gw), 3 ,  " model.0"  );
139-     auto  conv1 = convBlock (network, weightMap, *focus0 ->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1"  );
130+     auto  conv0  = convBlock (network, weightMap, *data,  get_width (64 , gw), 6 ,  2 ,  1 ,   " model.0"  );
131+     auto  conv1 = convBlock (network, weightMap, *conv0 ->getOutput (0 ), get_width (128 , gw), 3 , 2 , 1 , " model.1"  );
140132    auto  c3_2 = C3 (network, weightMap, *conv1->getOutput (0 ), get_width (128 , gw), get_width (128 , gw), get_depth (3 , gd), true , 1 , 0.5 , " model.2"  );
141133    auto  conv3 = convBlock (network, weightMap, *c3_2->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.3"  );
142-     auto  c3_4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.4"  );
134+     auto  c3_4 = C3 (network, weightMap, *conv3->getOutput (0 ), get_width (256 , gw), get_width (256 , gw), get_depth (6 , gd), true , 1 , 0.5 , " model.4"  );
143135    auto  conv5 = convBlock (network, weightMap, *c3_4->getOutput (0 ), get_width (512 , gw), 3 , 2 , 1 , " model.5"  );
144136    auto  c3_6 = C3 (network, weightMap, *conv5->getOutput (0 ), get_width (512 , gw), get_width (512 , gw), get_depth (9 , gd), true , 1 , 0.5 , " model.6"  );
145137    auto  conv7 = convBlock (network, weightMap, *c3_6->getOutput (0 ), get_width (768 , gw), 3 , 2 , 1 , " model.7"  );
146138    auto  c3_8 = C3 (network, weightMap, *conv7->getOutput (0 ), get_width (768 , gw), get_width (768 , gw), get_depth (3 , gd), true , 1 , 0.5 , " model.8"  );
147139    auto  conv9 = convBlock (network, weightMap, *c3_8->getOutput (0 ), get_width (1024 , gw), 3 , 2 , 1 , " model.9"  );
148-     auto  spp10 = SPP (network, weightMap, *conv9->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), 3 , 5 , 7 , " model.10"  );
149-     auto  c3_11 = C3 (network, weightMap, *spp10->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.11"  );
150- 
140+     auto  c3_10 = C3 (network, weightMap, *conv9->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.10"  );
141+     auto  sppf11 = SPPF (network, weightMap, *c3_10->getOutput (0 ), get_width (1024 , gw), get_width (1024 , gw), 5 , " model.11"  );
151142    /*  ------ yolov5 head ------ */ 
152-     auto  conv12 = convBlock (network, weightMap, *c3_11 ->getOutput (0 ), get_width (768 , gw), 1 , 1 , 1 , " model.12"  );
143+     auto  conv12 = convBlock (network, weightMap, *sppf11 ->getOutput (0 ), get_width (768 , gw), 1 , 1 , 1 , " model.12"  );
153144    auto  upsample13 = network->addResize (*conv12->getOutput (0 ));
154145    assert (upsample13);
155146    upsample13->setResizeMode (ResizeMode::kNEAREST );
156147    upsample13->setOutputDimensions (c3_8->getOutput (0 )->getDimensions ());
157148    ITensor* inputTensors14[] = { upsample13->getOutput (0 ), c3_8->getOutput (0 ) };
158149    auto  cat14 = network->addConcatenation (inputTensors14, 2 );
159150    auto  c3_15 = C3 (network, weightMap, *cat14->getOutput (0 ), get_width (1536 , gw), get_width (768 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.15"  );
160- 
161151    auto  conv16 = convBlock (network, weightMap, *c3_15->getOutput (0 ), get_width (512 , gw), 1 , 1 , 1 , " model.16"  );
162152    auto  upsample17 = network->addResize (*conv16->getOutput (0 ));
163153    assert (upsample17);
@@ -166,7 +156,6 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
166156    ITensor* inputTensors18[] = { upsample17->getOutput (0 ), c3_6->getOutput (0 ) };
167157    auto  cat18 = network->addConcatenation (inputTensors18, 2 );
168158    auto  c3_19 = C3 (network, weightMap, *cat18->getOutput (0 ), get_width (1024 , gw), get_width (512 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.19"  );
169- 
170159    auto  conv20 = convBlock (network, weightMap, *c3_19->getOutput (0 ), get_width (256 , gw), 1 , 1 , 1 , " model.20"  );
171160    auto  upsample21 = network->addResize (*conv20->getOutput (0 ));
172161    assert (upsample21);
@@ -175,22 +164,18 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
175164    ITensor* inputTensors21[] = { upsample21->getOutput (0 ), c3_4->getOutput (0 ) };
176165    auto  cat22 = network->addConcatenation (inputTensors21, 2 );
177166    auto  c3_23 = C3 (network, weightMap, *cat22->getOutput (0 ), get_width (512 , gw), get_width (256 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.23"  );
178- 
179167    auto  conv24 = convBlock (network, weightMap, *c3_23->getOutput (0 ), get_width (256 , gw), 3 , 2 , 1 , " model.24"  );
180168    ITensor* inputTensors25[] = { conv24->getOutput (0 ), conv20->getOutput (0 ) };
181169    auto  cat25 = network->addConcatenation (inputTensors25, 2 );
182170    auto  c3_26 = C3 (network, weightMap, *cat25->getOutput (0 ), get_width (1024 , gw), get_width (512 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.26"  );
183- 
184171    auto  conv27 = convBlock (network, weightMap, *c3_26->getOutput (0 ), get_width (512 , gw), 3 , 2 , 1 , " model.27"  );
185172    ITensor* inputTensors28[] = { conv27->getOutput (0 ), conv16->getOutput (0 ) };
186173    auto  cat28 = network->addConcatenation (inputTensors28, 2 );
187174    auto  c3_29 = C3 (network, weightMap, *cat28->getOutput (0 ), get_width (1536 , gw), get_width (768 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.29"  );
188- 
189175    auto  conv30 = convBlock (network, weightMap, *c3_29->getOutput (0 ), get_width (768 , gw), 3 , 2 , 1 , " model.30"  );
190176    ITensor* inputTensors31[] = { conv30->getOutput (0 ), conv12->getOutput (0 ) };
191177    auto  cat31 = network->addConcatenation (inputTensors31, 2 );
192178    auto  c3_32 = C3 (network, weightMap, *cat31->getOutput (0 ), get_width (2048 , gw), get_width (1024 , gw), get_depth (3 , gd), false , 1 , 0.5 , " model.32"  );
193- 
194179    /*  ------ detect ------ */ 
195180    IConvolutionLayer* det0 = network->addConvolutionNd (*c3_23->getOutput (0 ), 3  * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1  }, weightMap[" model.33.m.0.weight"  ], weightMap[" model.33.m.0.bias"  ]);
196181    IConvolutionLayer* det1 = network->addConvolutionNd (*c3_26->getOutput (0 ), 3  * (Yolo::CLASS_NUM + 5 ), DimsHW{ 1 , 1  }, weightMap[" model.33.m.1.weight"  ], weightMap[" model.33.m.1.bias"  ]);
@@ -200,7 +185,6 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
200185    auto  yolo = addYoLoLayer (network, weightMap, " model.33"  , std::vector<IConvolutionLayer*>{det0, det1, det2, det3});
201186    yolo->getOutput (0 )->setName (OUTPUT_BLOB_NAME);
202187    network->markOutput (*yolo->getOutput (0 ));
203- 
204188    //  Build engine
205189    builder->setMaxBatchSize (maxBatchSize);
206190    config->setMaxWorkspaceSize (16  * (1  << 20 ));  //  16MB
@@ -217,10 +201,8 @@ ICudaEngine* build_engine_p6(unsigned int maxBatchSize, IBuilder* builder, IBuil
217201    std::cout << " Building engine, please wait for a while..."   << std::endl;
218202    ICudaEngine* engine = builder->buildEngineWithConfig (*network, *config);
219203    std::cout << " Build engine successfully!"   << std::endl;
220- 
221204    //  Don't need the network any more
222205    network->destroy ();
223- 
224206    //  Release host memory
225207    for  (auto & mem : weightMap)
226208    {
0 commit comments