Skip to content

Commit f4c384d

Browse files
authored
add: Scaled yolov4 (wang-xinyu#524)
* add: mish, yololayer, layers * update: CMake * fix: compile * add: yolov4-csp net def * update: cuda kernel for scaled_yolov4 * increase nms thresh * update: README * add: gen_wts * update: CMake * fix memory leak * Update README.md * Update README.md * Update README.md * Update README.md
1 parent 4ffc56a commit f4c384d

File tree

11 files changed

+2106
-0
lines changed

11 files changed

+2106
-0
lines changed

scaled-yolov4/CMakeLists.txt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
cmake_minimum_required(VERSION 2.6)
2+
3+
project(yolov4)
4+
5+
add_definitions(-std=c++11)
6+
7+
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8+
set(CMAKE_CXX_STANDARD 11)
9+
set(CMAKE_BUILD_TYPE Debug)
10+
11+
find_package(CUDA REQUIRED)
12+
13+
include_directories(${PROJECT_SOURCE_DIR}/include)
14+
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
15+
# cuda
16+
include_directories(/usr/local/cuda/include)
17+
link_directories(/usr/local/cuda/lib64)
18+
# tensorrt
19+
include_directories(/usr/include/x86_64-linux-gnu/)
20+
link_directories(/usr/lib/x86_64-linux-gnu/)
21+
22+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
23+
24+
cuda_add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu ${PROJECT_SOURCE_DIR}/mish.cu)
25+
target_link_libraries(myplugins nvinfer cudart)
26+
27+
find_package(OpenCV)
28+
include_directories(${OpenCV_INCLUDE_DIRS})
29+
30+
add_executable(yolov4csp ${PROJECT_SOURCE_DIR}/yolov4_csp.cpp)
31+
target_link_libraries(yolov4csp nvinfer)
32+
target_link_libraries(yolov4csp cudart)
33+
target_link_libraries(yolov4csp myplugins)
34+
target_link_libraries(yolov4csp ${OpenCV_LIBS})
35+
36+
add_definitions(-O2 -pthread)
37+

scaled-yolov4/README.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# scaled-yolov4
2+
3+
The Pytorch implementation is from [WongKinYiu/ScaledYOLOv4 yolov4-csp branch](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp). It can load yolov4-csp.cfg and yolov4-csp.weights(from AlexeyAB/darknet).
4+
5+
Note: There is a slight difference in yolov4-csp.cfg for darknet and pytorch. Use the one given in the above repo.
6+
7+
## Config
8+
9+
- Input shape `INPUT_H`, `INPUT_W` defined in yololayer.h
10+
- Number of classes `CLASS_NUM` defined in yololayer.h
11+
- FP16/FP32 can be selected by the macro `USE_FP16` in yolov4_csp.cpp
12+
- GPU id can be selected by the macro `DEVICE` in yolov4_csp.cpp
13+
- NMS thresh `NMS_THRESH` in yolov4_csp.cpp
14+
- bbox confidence threshold `BBOX_CONF_THRESH` in yolov4_csp.cpp
15+
- `BATCH_SIZE` in yolov4_csp.cpp
16+
17+
## How to run
18+
19+
1. generate yolov4_csp.wts from pytorch implementation with yolov4-csp.cfg and yolov4-csp.weights.
20+
21+
```
22+
git clone https://github.com/wang-xinyu/tensorrtx.git
23+
git clone -b yolov4-csp https://github.com/WongKinYiu/ScaledYOLOv4.git
24+
// download yolov4-csp.weights from https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp#yolov4-csp
25+
cp {tensorrtx}/scaled-yolov4/gen_wts.py {ScaledYOLOv4/}
26+
cd {ScaledYOLOv4/}
27+
python gen_wts.py yolov4-csp.weights
28+
// a file 'yolov4_csp.wts' will be generated.
29+
```
30+
31+
2. put yolov4_csp.wts into {tensorrtx}/scaled-yolov4, build and run
32+
33+
```
34+
mv yolov4_csp.wts {tensorrtx}/scaled-yolov4/
35+
cd {tensorrtx}/scaled-yolov4
36+
mkdir build
37+
cd build
38+
cmake ..
39+
make
40+
sudo ./yolov4csp -s // serialize model to plan file i.e. 'yolov4csp.engine'
41+
sudo ./yolov4csp -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
42+
```
43+
44+
3. check the images generated, as follows. _zidane.jpg and _bus.jpg
45+
<p align="center">
46+
<img src= https://user-images.githubusercontent.com/39617050/117172509-824cf980-ade9-11eb-8e4c-27dbe658e355.jpg>
47+
</p>
48+
49+
<p align="center">
50+
<img src= https://user-images.githubusercontent.com/39617050/117172880-dbb52880-ade9-11eb-839a-0814fd46198e.jpg>
51+
</p>
52+
53+
54+
## More Information
55+
56+
See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)

scaled-yolov4/common.hpp

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
#include <fstream>
2+
#include <map>
3+
#include <sstream>
4+
#include <vector>
5+
#include <opencv2/opencv.hpp>
6+
7+
#include "NvInfer.h"
8+
#include "yololayer.h"
9+
#include "mish.h"
10+
11+
12+
using namespace nvinfer1;
13+
14+
cv::Mat preprocess_img(cv::Mat& img) {
15+
int w, h, x, y;
16+
float r_w = Yolo::INPUT_W / (img.cols*1.0);
17+
float r_h = Yolo::INPUT_H / (img.rows*1.0);
18+
if (r_h > r_w) {
19+
w = Yolo::INPUT_W;
20+
h = r_w * img.rows;
21+
x = 0;
22+
y = (Yolo::INPUT_H - h) / 2;
23+
} else {
24+
w = r_h* img.cols;
25+
h = Yolo::INPUT_H;
26+
x = (Yolo::INPUT_W - w) / 2;
27+
y = 0;
28+
}
29+
cv::Mat re(h, w, CV_8UC3);
30+
cv::resize(img, re, re.size());
31+
cv::Mat out(Yolo::INPUT_H, Yolo::INPUT_W, CV_8UC3, cv::Scalar(128, 128, 128));
32+
re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
33+
return out;
34+
}
35+
36+
cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
37+
int l, r, t, b;
38+
float r_w = Yolo::INPUT_W / (img.cols * 1.0);
39+
float r_h = Yolo::INPUT_H / (img.rows * 1.0);
40+
if (r_h > r_w) {
41+
l = bbox[0] - bbox[2]/2.f;
42+
r = bbox[0] + bbox[2]/2.f;
43+
t = bbox[1] - bbox[3]/2.f - (Yolo::INPUT_H - r_w * img.rows) / 2;
44+
b = bbox[1] + bbox[3]/2.f - (Yolo::INPUT_H - r_w * img.rows) / 2;
45+
l = l / r_w;
46+
r = r / r_w;
47+
t = t / r_w;
48+
b = b / r_w;
49+
} else {
50+
l = bbox[0] - bbox[2]/2.f - (Yolo::INPUT_W - r_h * img.cols) / 2;
51+
r = bbox[0] + bbox[2]/2.f - (Yolo::INPUT_W - r_h * img.cols) / 2;
52+
t = bbox[1] - bbox[3]/2.f;
53+
b = bbox[1] + bbox[3]/2.f;
54+
l = l / r_h;
55+
r = r / r_h;
56+
t = t / r_h;
57+
b = b / r_h;
58+
}
59+
return cv::Rect(l, t, r-l, b-t);
60+
}
61+
62+
float iou(float lbox[4], float rbox[4]) {
63+
float interBox[] = {
64+
std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
65+
std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
66+
std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
67+
std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
68+
};
69+
70+
if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
71+
return 0.0f;
72+
73+
float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
74+
return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
75+
}
76+
77+
bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) {
78+
return a.det_confidence > b.det_confidence;
79+
}
80+
81+
void nms(std::vector<Yolo::Detection>& res, float *output, float conf_thresh, float nms_thresh = 0.5) {
82+
int det_size = sizeof(Yolo::Detection) / sizeof(float);
83+
std::map<float, std::vector<Yolo::Detection>> m;
84+
for (int i = 0; i < output[0] && i < Yolo::MAX_OUTPUT_BBOX_COUNT; i++) {
85+
if (output[1 + det_size * i + 4] <= conf_thresh) continue;
86+
Yolo::Detection det;
87+
memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
88+
if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Yolo::Detection>());
89+
m[det.class_id].push_back(det);
90+
}
91+
for (auto it = m.begin(); it != m.end(); it++) {
92+
//std::cout << it->second[0].class_id << " --- " << std::endl;
93+
auto& dets = it->second;
94+
std::sort(dets.begin(), dets.end(), cmp);
95+
for (size_t m = 0; m < dets.size(); ++m) {
96+
auto& item = dets[m];
97+
res.push_back(item);
98+
for (size_t n = m + 1; n < dets.size(); ++n) {
99+
if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
100+
dets.erase(dets.begin()+n);
101+
--n;
102+
}
103+
}
104+
}
105+
}
106+
}
107+
108+
// TensorRT weight files have a simple space delimited format:
109+
// [type] [size] <data x size in hex>
110+
std::map<std::string, Weights> loadWeights(const std::string file) {
111+
std::cout << "Loading weights: " << file << std::endl;
112+
std::map<std::string, Weights> weightMap;
113+
114+
// Open weights file
115+
std::ifstream input(file);
116+
assert(input.is_open() && "Unable to load weight file.");
117+
118+
// Read number of weight blobs
119+
int32_t count;
120+
input >> count;
121+
assert(count > 0 && "Invalid weight map file.");
122+
123+
while (count--)
124+
{
125+
Weights wt{DataType::kFLOAT, nullptr, 0};
126+
uint32_t size;
127+
128+
// Read name and type of blob
129+
std::string name;
130+
input >> name >> std::dec >> size;
131+
wt.type = DataType::kFLOAT;
132+
133+
// Load blob
134+
uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
135+
for (uint32_t x = 0, y = size; x < y; ++x)
136+
{
137+
input >> std::hex >> val[x];
138+
}
139+
wt.values = val;
140+
141+
wt.count = size;
142+
weightMap[name] = wt;
143+
}
144+
145+
return weightMap;
146+
}
147+
148+
IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
149+
float *gamma = (float*)weightMap[lname + ".weight"].values;
150+
float *beta = (float*)weightMap[lname + ".bias"].values;
151+
float *mean = (float*)weightMap[lname + ".running_mean"].values;
152+
float *var = (float*)weightMap[lname + ".running_var"].values;
153+
int len = weightMap[lname + ".running_var"].count;
154+
155+
float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
156+
for (int i = 0; i < len; i++) {
157+
scval[i] = gamma[i] / sqrt(var[i] + eps);
158+
}
159+
Weights scale{DataType::kFLOAT, scval, len};
160+
161+
float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
162+
for (int i = 0; i < len; i++) {
163+
shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
164+
}
165+
Weights shift{DataType::kFLOAT, shval, len};
166+
167+
float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
168+
for (int i = 0; i < len; i++) {
169+
pval[i] = 1.0;
170+
}
171+
Weights power{DataType::kFLOAT, pval, len};
172+
173+
weightMap[lname + ".scale"] = scale;
174+
weightMap[lname + ".shift"] = shift;
175+
weightMap[lname + ".power"] = power;
176+
IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
177+
assert(scale_1);
178+
return scale_1;
179+
}
180+
181+
ILayer* convBnMish(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, int s, int p, int linx) {
182+
Weights emptywts{DataType::kFLOAT, nullptr, 0};
183+
IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{ksize, ksize}, weightMap["module_list." + std::to_string(linx) + ".Conv2d.weight"], emptywts);
184+
assert(conv1);
185+
conv1->setStrideNd(DimsHW{s, s});
186+
conv1->setPaddingNd(DimsHW{p, p});
187+
188+
IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "module_list." + std::to_string(linx) + ".BatchNorm2d", 1e-4);
189+
190+
auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
191+
const PluginFieldCollection* pluginData = creator->getFieldNames();
192+
IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(linx)).c_str(), pluginData);
193+
ITensor* inputTensors[] = {bn1->getOutput(0)};
194+
auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
195+
return mish;
196+
}

scaled-yolov4/gen_wts.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import struct
2+
import sys
3+
from models.models import *
4+
from utils import *
5+
6+
model = Darknet('models/yolov4-csp.cfg', (512, 512))
7+
weights = sys.argv[1]
8+
device = torch_utils.select_device('0')
9+
if weights.endswith('.pt'): # pytorch format
10+
model.load_state_dict(torch.load(weights, map_location=device)['model'])
11+
else: # darknet format
12+
load_darknet_weights(model, weights)
13+
14+
with open('yolov4_csp.wts', 'w') as f:
15+
f.write('{}\n'.format(len(model.state_dict().keys())))
16+
for k, v in model.state_dict().items():
17+
vr = v.reshape(-1).cpu().numpy()
18+
f.write('{} {} '.format(k, len(vr)))
19+
for vv in vr:
20+
f.write(' ')
21+
f.write(struct.pack('>f',float(vv)).hex())
22+
f.write('\n')
23+

0 commit comments

Comments
 (0)