Skip to content

Commit 1522e28

Browse files
author
hwh-hit
committed
add Ultra-Fast-Lane-Detection implementation in TensorRT6
1 parent 5adc655 commit 1522e28

File tree

7 files changed

+1187
-0
lines changed

7 files changed

+1187
-0
lines changed

lane_det/CMakeLists.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
cmake_minimum_required(VERSION 2.6)
2+
3+
project(lane_det)
4+
5+
add_definitions(-std=c++11)
6+
7+
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8+
set(CMAKE_CXX_STANDARD 11)
9+
set(CMAKE_BUILD_TYPE Debug)
10+
11+
find_package(CUDA REQUIRED)
12+
13+
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
14+
15+
include_directories(${PROJECT_SOURCE_DIR}/include)
16+
17+
find_package(OpenCV)
18+
include_directories(OpenCV_INCLUDE_DIRS)
19+
20+
add_executable(lane_det ${PROJECT_SOURCE_DIR}/lane_det.cpp)
21+
target_link_libraries(lane_det nvinfer)
22+
target_link_libraries(lane_det cudart)
23+
target_link_libraries(lane_det ${OpenCV_LIBS})
24+
25+
add_definitions(-O2 -pthread)
26+

lane_det/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Ultra-Fast-Lane-Detection
2+
3+
The Pytorch implementation is [Ultra-Fast-Lane-Detection](https://github.com/cfzd/Ultra-Fast-Lane-Detection).
4+
5+
## How to Run
6+
7+
1. generate lane.wts and lane.onnx from pytorch with tusimple_18.pth
8+
9+
git clone https://github.com/wang-xinyu/tensorrtx.git
10+
git clone https://github.com/cfzd/Ultra-Fast-Lane-Detection.git
11+
// download its weights 'tusimple_18.pth'
12+
// copy tensorrtx/lane_det/gen_wts.py into Ultra-Fast-Lane-Detection/
13+
// ensure the file name is tusimple_18.pth and lane.wts in gen_wts.py
14+
// go to Ultra-Fast-Lane-Detection
15+
python gen_wts.py
16+
// a file 'lane.wts' will be generated.
17+
// then
18+
python pth2onnx.py
19+
//a file 'lane.onnx' will be generated.
20+
21+
2. build tensorrtx/lane_det and run
22+
23+
```
24+
mkdir build
25+
cd build
26+
cmake ..
27+
make
28+
sudo ./lane_det -s // serialize model to plan file i.e. 'DBNet.engine'
29+
sudo ./lane_det -d ../data // deserialize plan file and run inference, the images in data will be processed.
30+
```
31+
32+
## More Information
33+
1. Changed the preprocess and postprocess in tensorrtx, give a different way to convert NHWC to NCHW in preprocess and just show the reslut using opencv rather than saving the result in postprocess.
34+
2. If there are some bugs where you inference with multi batch_size, just modify the code in preprocess or postprocess, it's not complicated.
35+
3. Some results are stored in resluts folder.

lane_det/common.hpp

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#ifndef LANE_DET_COMMON_H_
2+
#define LANE_DET_COMMON_H_
3+
4+
#include <iostream>
5+
#include <fstream>
6+
#include <map>
7+
#include <string>
8+
#include <sstream>
9+
#include <vector>
10+
#include <opencv2/opencv.hpp>
11+
#include "dirent.h"
12+
#include "NvInfer.h"
13+
#include <chrono>
14+
15+
#define CHECK(status) \
16+
do\
17+
{\
18+
auto ret = (status);\
19+
if (ret != 0)\
20+
{\
21+
std::cerr << "Cuda failure: " << ret << std::endl;\
22+
abort();\
23+
}\
24+
} while (0)
25+
26+
using namespace nvinfer1;
27+
28+
// TensorRT weight files have a simple space delimited format:
29+
// [type] [size] <data x size in hex>
30+
std::map<std::string, Weights> loadWeights(const std::string file) {
31+
std::cout << "Loading weights: " << file << std::endl;
32+
std::map<std::string, Weights> weightMap;
33+
34+
// Open weights file
35+
std::ifstream input(file);
36+
assert(input.is_open() && "Unable to load weight file.");
37+
38+
// Read number of weight blobs
39+
int32_t count;
40+
input >> count;
41+
assert(count > 0 && "Invalid weight map file.");
42+
43+
while (count--)
44+
{
45+
Weights wt{DataType::kFLOAT, nullptr, 0};
46+
uint32_t size;
47+
48+
// Read name and type of blob
49+
std::string name;
50+
input >> name >> std::dec >> size;
51+
wt.type = DataType::kFLOAT;
52+
53+
// Load blob
54+
uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
55+
for (uint32_t x = 0, y = size; x < y; ++x)
56+
{
57+
input >> std::hex >> val[x];
58+
}
59+
wt.values = val;
60+
61+
wt.count = size;
62+
weightMap[name] = wt;
63+
}
64+
65+
return weightMap;
66+
}
67+
68+
IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
69+
float *gamma = (float*)weightMap[lname + ".weight"].values;
70+
float *beta = (float*)weightMap[lname + ".bias"].values;
71+
float *mean = (float*)weightMap[lname + ".running_mean"].values;
72+
float *var = (float*)weightMap[lname + ".running_var"].values;
73+
int len = weightMap[lname + ".running_var"].count;
74+
75+
float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
76+
for (int i = 0; i < len; i++) {
77+
scval[i] = gamma[i] / sqrt(var[i] + eps);
78+
}
79+
Weights scale{DataType::kFLOAT, scval, len};
80+
81+
float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
82+
for (int i = 0; i < len; i++) {
83+
shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
84+
}
85+
Weights shift{DataType::kFLOAT, shval, len};
86+
87+
float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
88+
for (int i = 0; i < len; i++) {
89+
pval[i] = 1.0;
90+
}
91+
Weights power{DataType::kFLOAT, pval, len};
92+
93+
weightMap[lname + ".scale"] = scale;
94+
weightMap[lname + ".shift"] = shift;
95+
weightMap[lname + ".power"] = power;
96+
IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
97+
assert(scale_1);
98+
return scale_1;
99+
}
100+
101+
ILayer* convBnLeaky( INetworkDefinition *network, std::map<std::string, Weights>& weightMap,
102+
ITensor& input, int outch, int ksize, int s, int p, int g,
103+
std::string lname, int i, bool use_bn = false )
104+
{
105+
Weights emptywts{DataType::kFLOAT, nullptr, 0};
106+
107+
IConvolutionLayer* conv1 = network->addConvolution(input, outch, DimsHW{ ksize, ksize }, weightMap[lname + ".conv"+ std::to_string(i) + ".weight"], weightMap[lname + ".conv" + std::to_string(i)+".bias"]);
108+
assert(conv1);
109+
conv1->setStride(DimsHW{s, s});
110+
conv1->setPadding(DimsHW{p, p});
111+
conv1->setNbGroups(g);
112+
if (use_bn)
113+
{
114+
IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".batchnorm"+std::to_string(i), 1e-5);
115+
auto relu = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
116+
assert(relu);
117+
return relu;
118+
}
119+
else
120+
{
121+
auto relu = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
122+
assert(relu);
123+
return relu;
124+
}
125+
}
126+
127+
IActivationLayer* basicBlock(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int stride, std::string lname) {
128+
Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
129+
130+
IConvolutionLayer* conv1 = network->addConvolution(input, outch, DimsHW{ 3, 3 }, weightMap[lname + "conv1.weight"], emptywts);
131+
assert(conv1);
132+
conv1->setStride(DimsHW{ stride, stride });
133+
conv1->setPadding(DimsHW{ 1, 1 });
134+
135+
IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);
136+
137+
IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
138+
assert(relu1);
139+
140+
IConvolutionLayer* conv2 = network->addConvolution(*relu1->getOutput(0), outch, DimsHW{ 3, 3 }, weightMap[lname + "conv2.weight"], emptywts);
141+
assert(conv2);
142+
conv2->setPadding(DimsHW{ 1, 1 });
143+
144+
IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);
145+
146+
IElementWiseLayer* ew1;
147+
if (inch != outch) {
148+
IConvolutionLayer* conv3 = network->addConvolution(input, outch, DimsHW{ 1, 1 }, weightMap[lname + "downsample.0.weight"], emptywts);
149+
assert(conv3);
150+
conv3->setStride(DimsHW{ stride, stride });
151+
IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "downsample.1", 1e-5);
152+
ew1 = network->addElementWise(*bn3->getOutput(0), *bn2->getOutput(0), ElementWiseOperation::kSUM);
153+
}
154+
else {
155+
ew1 = network->addElementWise(input, *bn2->getOutput(0), ElementWiseOperation::kSUM);
156+
}
157+
IActivationLayer* relu2 = network->addActivation(*ew1->getOutput(0), ActivationType::kRELU);
158+
assert(relu2);
159+
return relu2;
160+
}
161+
162+
int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
163+
DIR *p_dir = opendir(p_dir_name);
164+
if (p_dir == nullptr) {
165+
return -1;
166+
}
167+
168+
struct dirent* p_file = nullptr;
169+
while ((p_file = readdir(p_dir)) != nullptr) {
170+
if (strcmp(p_file->d_name, ".") != 0 &&
171+
strcmp(p_file->d_name, "..") != 0) {
172+
//std::string cur_file_name(p_dir_name);
173+
//cur_file_name += "/";
174+
//cur_file_name += p_file->d_name;
175+
std::string cur_file_name(p_file->d_name);
176+
file_names.push_back(cur_file_name);
177+
}
178+
}
179+
closedir(p_dir);
180+
return 0;
181+
}
182+
183+
#endif
184+

lane_det/gen_wts.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import torch
2+
import struct
3+
#import models.crnn as crnn
4+
from model.model import parsingNet
5+
6+
# Initialize
7+
model = parsingNet(pretrained = False, backbone='18', cls_dim = (101, 56, 4), use_aux=False).cuda()
8+
device = 'cpu'
9+
# Load model
10+
state_dict = torch.load('tusimple_18.pth', map_location='cpu')['model']
11+
model.to(device).eval()
12+
13+
f = open('lane.wts', 'w')
14+
f.write('{}\n'.format(len(state_dict.keys())))
15+
for k, v in state_dict.items():
16+
vr = v.reshape(-1).cpu().numpy()
17+
f.write('{} {} '.format(k, len(vr)))
18+
for vv in vr:
19+
f.write(' ')
20+
f.write(struct.pack('>f',float(vv)).hex())
21+
f.write('\n')

0 commit comments

Comments
 (0)