33#include " cuda_runtime_api.h"
44#include " logging.h"
55#include " common.hpp"
6+
67#define DEVICE 0
7- #define NET s // s m l x
8- #define NETSTRUCT (str ) createEngine_##str
9- #define CREATENET (net ) NETSTRUCT(net)
10- #define STR1 (x ) #x
11- #define STR2 (x ) STR1(x)
128// #define USE_FP16 // comment out this if want to use FP16
139#define CONF_THRESH 0.5
1410#define BATCH_SIZE 1
11+
12+ using namespace nvinfer1 ;
13+
1514// stuff we know about the network and the input/output blobs
1615static const int INPUT_H = 816 ;
1716static const int INPUT_W = 672 ;
1817static const int OUTPUT_SIZE = 672 *816 ;
19-
2018const char * INPUT_BLOB_NAME = " data" ;
2119const char * OUTPUT_BLOB_NAME = " prob" ;
22-
23- using namespace nvinfer1 ;
24-
2520static Logger gLogger ;
2621
27-
2822cv::Mat preprocess_img (cv::Mat& img) {
2923 int w, h, x, y;
3024 float r_w = INPUT_W / (img.cols *1.0 );
@@ -47,8 +41,6 @@ cv::Mat preprocess_img(cv::Mat& img) {
4741 return out;
4842}
4943
50-
51-
5244ILayer* doubleConv (INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, std::string lname, int midch){
5345 // Weights emptywts{DataType::kFLOAT, nullptr, 0};
5446 // int p = ksize / 2;
@@ -97,28 +89,26 @@ ILayer* up(INetworkDefinition *network, std::map<std::string, Weights>& weightMa
9789 // IPoolingLayer* pool1 = network->addPooling(dcov1, PoolingType::kMAX, DimsHW{2, 2});
9890 // pool1->setStrideNd(DimsHW{2, 2});
9991 // dcov1->add_pading
100- ILayer* pad1 = network->addPaddingNd (*deconv1->getOutput (0 ),DimsHW{diffx / 2 , diffy / 2 },DimsHW{diffx - (diffx / 2 ), diffy - (diffy / 2 )});
92+ ILayer* pad1 = network->addPaddingNd (*deconv1->getOutput (0 ), DimsHW{diffx / 2 , diffy / 2 }, DimsHW{diffx - (diffx / 2 ), diffy - (diffy / 2 )});
10193 // dcov1->setPaddingNd(DimsHW{diffx / 2, diffx - diffx / 2},DimsHW{diffy / 2, diffy - diffy / 2});
102- ITensor* inputTensors[] = {&input2,pad1->getOutput (0 )};
94+ ITensor* inputTensors[] = {&input2, pad1->getOutput (0 )};
10395 auto cat = network->addConcatenation (inputTensors, 2 );
10496 assert (cat);
105- if (midch== 64 ){
97+ if (midch == 64 ) {
10698 ILayer* dcov1 = doubleConv (network,weightMap,*cat->getOutput (0 ),outch,3 ,lname+" .conv" ,outch);
10799 assert (dcov1);
108100 return dcov1;
109- }else {
101+ } else {
110102 int midch1 = outch/2 ;
111103 ILayer* dcov1 = doubleConv (network,weightMap,*cat->getOutput (0 ),midch1,3 ,lname+" .conv" ,outch);
112104 assert (dcov1);
113105 return dcov1;
114106 }
115-
116107 // assert(dcov1);
117-
118108 // return dcov1;
119109}
120110
121- ILayer* outConv (INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, std::string lname){
111+ ILayer* outConv (INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, std::string lname) {
122112 // Weights emptywts{DataType::kFLOAT, nullptr, 0};
123113
124114 IConvolutionLayer* conv1 = network->addConvolutionNd (input, 1 , DimsHW{1 , 1 }, weightMap[lname + " .conv.weight" ], weightMap[lname + " .conv.bias" ]);
@@ -129,16 +119,14 @@ ILayer* outConv(INetworkDefinition *network, std::map<std::string, Weights>& wei
129119 return conv1;
130120}
131121
132-
133-
134122ICudaEngine* createEngine_l (unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt) {
135123 INetworkDefinition* network = builder->createNetworkV2 (0U );
136124
137125 // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
138126 ITensor* data = network->addInput (INPUT_BLOB_NAME, dt, Dims3{ 3 , INPUT_H, INPUT_W });
139127 assert (data);
140128
141- std::map<std::string, Weights> weightMap = loadWeights (" /home/sycv/workplace/pengyuzhou/tensorrtx/ unet/unet_816_672 .wts" );
129+ std::map<std::string, Weights> weightMap = loadWeights (" ../ unet.wts" );
142130 Weights emptywts{DataType::kFLOAT , nullptr , 0 };
143131
144132 // build network
@@ -170,22 +158,19 @@ ICudaEngine* createEngine_l(unsigned int maxBatchSize, IBuilder* builder, IBuild
170158 network->destroy ();
171159
172160 // Release host memory
173- for (auto & mem : weightMap)
174- {
161+ for (auto & mem : weightMap) {
175162 free ((void *)(mem.second .values ));
176163 }
177164
178165 return engine;
179166}
180167
181-
182168void APIToModel (unsigned int maxBatchSize, IHostMemory** modelStream) {
183169 // Create builder
184170 IBuilder* builder = createInferBuilder (gLogger );
185171 IBuilderConfig* config = builder->createBuilderConfig ();
186172
187173 // Create model to populate the network, then set the outputs and create an engine
188- // ICudaEngine* engine = (CREATENET(NET))(maxBatchSize, builder, config, DataType::kFLOAT);
189174 ICudaEngine* engine = createEngine_l (maxBatchSize, builder, config, DataType::kFLOAT );
190175 assert (engine != nullptr );
191176
@@ -222,7 +207,7 @@ void doInference(IExecutionContext& context, float* input, float* output, int ba
222207 CHECK (cudaMemcpyAsync (buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof (float ), cudaMemcpyHostToDevice, stream));
223208 context.enqueue (batchSize, buffers, stream, nullptr );
224209 CHECK (cudaMemcpyAsync (output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof (float ), cudaMemcpyDeviceToHost, stream));
225- // 流同步:通过cudaStreamSynchronize()来协调。
210+
226211 cudaStreamSynchronize (stream);
227212
228213 // Release stream and buffers
@@ -231,20 +216,19 @@ void doInference(IExecutionContext& context, float* input, float* output, int ba
231216 CHECK (cudaFree (buffers[outputIndex]));
232217}
233218
234- struct Detection {
219+ struct Detection {
235220 float mask[INPUT_W*INPUT_H*1 ];
236- };
221+ };
237222
238- float sigmoid (float x)
239- {
223+ float sigmoid (float x) {
240224 return (1 / (1 + exp (-x)));
241225}
242226
243227void process_cls_result (Detection &res, float *output) {
244- for (int i= 0 ;i< INPUT_W* INPUT_H* 1 ; i++){
228+ for (int i = 0 ; i < INPUT_W * INPUT_H * 1 ; i++) {
245229 res.mask [i] = sigmoid (*(output+i));
246- }
247230 }
231+ }
248232
249233int main (int argc, char ** argv) {
250234 cudaSetDevice (DEVICE);
@@ -329,8 +313,6 @@ int main(int argc, char** argv) {
329313 auto end = std::chrono::system_clock::now ();
330314 std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count () << " ms" << std::endl;
331315
332-
333-
334316 std::vector<Detection> batch_res (fcount);
335317 for (int b = 0 ; b < fcount; b++) {
336318 auto & res = batch_res[b];
0 commit comments