Skip to content

Commit e94e4ed

Browse files
committed
unet support trt8
1 parent 1512aa7 commit e94e4ed

File tree

4 files changed

+55
-49
lines changed

4 files changed

+55
-49
lines changed

unet/CMakeLists.txt

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,23 @@ option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
88
set(CMAKE_CXX_STANDARD 11)
99
set(CMAKE_BUILD_TYPE Debug)
1010

11-
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
12-
1311
# cuda directory
14-
include_directories(${PROJECT_SOURCE_DIR}/include)
15-
include_directories(/usr/local/cuda-10.2/targets/x86_64-linux/include)
16-
link_directories(/usr/local/cuda-10.2/targets/x86_64-linux/lib)
12+
include_directories(/usr/local/cuda/include/)
13+
link_directories(/usr/local/cuda/lib64/)
1714

1815
# tensorrt
19-
include_directories(/home/sycv/workplace/pengyuzhou/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/include)
20-
link_directories(/home/sycv/workplace/pengyuzhou/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib)
16+
include_directories(/workspace/TensorRT-8.4.1.5/include/)
17+
link_directories(/workspace/TensorRT-8.4.1.5/lib/)
2118

22-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
19+
# opencv library
20+
find_package(OpenCV)
21+
include_directories(${OpenCV_INCLUDE_DIRS})
2322

2423
# link library and add exec file
2524
add_executable(unet ${PROJECT_SOURCE_DIR}/unet.cpp)
2625
target_link_libraries(unet nvinfer)
2726
target_link_libraries(unet cudart)
27+
target_link_libraries(unet ${OpenCV_LIBS})
2828

2929
add_definitions(-O2 -pthread)
3030

31-
# opencv library
32-
find_package(OpenCV)
33-
include_directories(${OpenCV_INCLUDE_DIRS})
34-
target_link_libraries(unet ${OpenCV_LIBS})

unet/logging.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <ostream>
2626
#include <sstream>
2727
#include <string>
28+
#include "macros.h"
2829

2930
using Severity = nvinfer1::ILogger::Severity;
3031

@@ -236,7 +237,7 @@ class Logger : public nvinfer1::ILogger
236237
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
237238
//! inheritance from nvinfer1::ILogger
238239
//!
239-
void log(Severity severity, const char* msg) override
240+
void log(Severity severity, const char* msg) TRT_NOEXCEPT override
240241
{
241242
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
242243
}
@@ -500,4 +501,4 @@ inline LogStreamConsumer LOG_FATAL(const Logger& logger)
500501

501502
} // anonymous namespace
502503

503-
#endif // TENSORRT_LOGGING_H
504+
#endif // TENSORRT_LOGGING_H

unet/macros.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef __MACROS_H
2+
#define __MACROS_H
3+
4+
#ifdef API_EXPORTS
5+
#if defined(_MSC_VER)
6+
#define API __declspec(dllexport)
7+
#else
8+
#define API __attribute__((visibility("default")))
9+
#endif
10+
#else
11+
12+
#if defined(_MSC_VER)
13+
#define API __declspec(dllimport)
14+
#else
15+
#define API
16+
#endif
17+
#endif // API_EXPORTS
18+
19+
#if NV_TENSORRT_MAJOR >= 8
20+
#define TRT_NOEXCEPT noexcept
21+
#define TRT_CONST_ENQUEUE const
22+
#else
23+
#define TRT_NOEXCEPT
24+
#define TRT_CONST_ENQUEUE
25+
#endif
26+
27+
#endif // __MACROS_H

unet/unet.cpp

Lines changed: 17 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,22 @@
33
#include "cuda_runtime_api.h"
44
#include "logging.h"
55
#include "common.hpp"
6+
67
#define DEVICE 0
7-
#define NET s // s m l x
8-
#define NETSTRUCT(str) createEngine_##str
9-
#define CREATENET(net) NETSTRUCT(net)
10-
#define STR1(x) #x
11-
#define STR2(x) STR1(x)
128
// #define USE_FP16 // comment out this if want to use FP16
139
#define CONF_THRESH 0.5
1410
#define BATCH_SIZE 1
11+
12+
using namespace nvinfer1;
13+
1514
// stuff we know about the network and the input/output blobs
1615
static const int INPUT_H = 816;
1716
static const int INPUT_W = 672;
1817
static const int OUTPUT_SIZE = 672*816;
19-
2018
const char* INPUT_BLOB_NAME = "data";
2119
const char* OUTPUT_BLOB_NAME = "prob";
22-
23-
using namespace nvinfer1;
24-
2520
static Logger gLogger;
2621

27-
2822
cv::Mat preprocess_img(cv::Mat& img) {
2923
int w, h, x, y;
3024
float r_w = INPUT_W / (img.cols*1.0);
@@ -47,8 +41,6 @@ cv::Mat preprocess_img(cv::Mat& img) {
4741
return out;
4842
}
4943

50-
51-
5244
ILayer* doubleConv(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, std::string lname, int midch){
5345
// Weights emptywts{DataType::kFLOAT, nullptr, 0};
5446
// int p = ksize / 2;
@@ -97,28 +89,26 @@ ILayer* up(INetworkDefinition *network, std::map<std::string, Weights>& weightMa
9789
// IPoolingLayer* pool1 = network->addPooling(dcov1, PoolingType::kMAX, DimsHW{2, 2});
9890
// pool1->setStrideNd(DimsHW{2, 2});
9991
// dcov1->add_pading
100-
ILayer* pad1 = network->addPaddingNd(*deconv1->getOutput(0),DimsHW{diffx / 2, diffy / 2},DimsHW{diffx - (diffx / 2), diffy - (diffy / 2)});
92+
ILayer* pad1 = network->addPaddingNd(*deconv1->getOutput(0), DimsHW{diffx / 2, diffy / 2}, DimsHW{diffx - (diffx / 2), diffy - (diffy / 2)});
10193
// dcov1->setPaddingNd(DimsHW{diffx / 2, diffx - diffx / 2},DimsHW{diffy / 2, diffy - diffy / 2});
102-
ITensor* inputTensors[] = {&input2,pad1->getOutput(0)};
94+
ITensor* inputTensors[] = {&input2, pad1->getOutput(0)};
10395
auto cat = network->addConcatenation(inputTensors, 2);
10496
assert(cat);
105-
if (midch==64){
97+
if (midch == 64) {
10698
ILayer* dcov1 = doubleConv(network,weightMap,*cat->getOutput(0),outch,3,lname+".conv",outch);
10799
assert(dcov1);
108100
return dcov1;
109-
}else{
101+
} else {
110102
int midch1 = outch/2;
111103
ILayer* dcov1 = doubleConv(network,weightMap,*cat->getOutput(0),midch1,3,lname+".conv",outch);
112104
assert(dcov1);
113105
return dcov1;
114106
}
115-
116107
// assert(dcov1);
117-
118108
// return dcov1;
119109
}
120110

121-
ILayer* outConv(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, std::string lname){
111+
ILayer* outConv(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, std::string lname) {
122112
// Weights emptywts{DataType::kFLOAT, nullptr, 0};
123113

124114
IConvolutionLayer* conv1 = network->addConvolutionNd(input, 1, DimsHW{1, 1}, weightMap[lname + ".conv.weight"], weightMap[lname + ".conv.bias"]);
@@ -129,16 +119,14 @@ ILayer* outConv(INetworkDefinition *network, std::map<std::string, Weights>& wei
129119
return conv1;
130120
}
131121

132-
133-
134122
ICudaEngine* createEngine_l(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt) {
135123
INetworkDefinition* network = builder->createNetworkV2(0U);
136124

137125
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
138126
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{ 3, INPUT_H, INPUT_W });
139127
assert(data);
140128

141-
std::map<std::string, Weights> weightMap = loadWeights("/home/sycv/workplace/pengyuzhou/tensorrtx/unet/unet_816_672.wts");
129+
std::map<std::string, Weights> weightMap = loadWeights("../unet.wts");
142130
Weights emptywts{DataType::kFLOAT, nullptr, 0};
143131

144132
// build network
@@ -170,22 +158,19 @@ ICudaEngine* createEngine_l(unsigned int maxBatchSize, IBuilder* builder, IBuild
170158
network->destroy();
171159

172160
// Release host memory
173-
for (auto& mem : weightMap)
174-
{
161+
for (auto& mem : weightMap) {
175162
free((void*)(mem.second.values));
176163
}
177164

178165
return engine;
179166
}
180167

181-
182168
void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream) {
183169
// Create builder
184170
IBuilder* builder = createInferBuilder(gLogger);
185171
IBuilderConfig* config = builder->createBuilderConfig();
186172

187173
// Create model to populate the network, then set the outputs and create an engine
188-
// ICudaEngine* engine = (CREATENET(NET))(maxBatchSize, builder, config, DataType::kFLOAT);
189174
ICudaEngine* engine = createEngine_l(maxBatchSize, builder, config, DataType::kFLOAT);
190175
assert(engine != nullptr);
191176

@@ -222,7 +207,7 @@ void doInference(IExecutionContext& context, float* input, float* output, int ba
222207
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
223208
context.enqueue(batchSize, buffers, stream, nullptr);
224209
CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
225-
//流同步:通过cudaStreamSynchronize()来协调。
210+
226211
cudaStreamSynchronize(stream);
227212

228213
// Release stream and buffers
@@ -231,20 +216,19 @@ void doInference(IExecutionContext& context, float* input, float* output, int ba
231216
CHECK(cudaFree(buffers[outputIndex]));
232217
}
233218

234-
struct Detection{
219+
struct Detection {
235220
float mask[INPUT_W*INPUT_H*1];
236-
};
221+
};
237222

238-
float sigmoid(float x)
239-
{
223+
float sigmoid(float x) {
240224
return (1 / (1 + exp(-x)));
241225
}
242226

243227
void process_cls_result(Detection &res, float *output) {
244-
for(int i=0;i<INPUT_W*INPUT_H*1;i++){
228+
for (int i = 0; i < INPUT_W * INPUT_H * 1; i++) {
245229
res.mask[i] = sigmoid(*(output+i));
246-
}
247230
}
231+
}
248232

249233
int main(int argc, char** argv) {
250234
cudaSetDevice(DEVICE);
@@ -329,8 +313,6 @@ int main(int argc, char** argv) {
329313
auto end = std::chrono::system_clock::now();
330314
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
331315

332-
333-
334316
std::vector<Detection> batch_res(fcount);
335317
for (int b = 0; b < fcount; b++) {
336318
auto& res = batch_res[b];

0 commit comments

Comments
 (0)