Skip to content

Commit 772d353

Browse files
PrincePwang-xinyu
andauthored
Draw label text for yolov5_seg (wang-xinyu#1197)
* convert class id to class name + added confidence + different colors for class ids already present + background box to match with yolov5 repo + added coco.txt * Required changes: removed coco.txt and *.jpg files, updated readme * Resolving comments * Update utils.h * Update yolov5_seg.cpp * Update README.md Co-authored-by: Wang Xinyu <[email protected]>
1 parent 7b79de4 commit 772d353

File tree

3 files changed

+90
-21
lines changed

3 files changed

+90
-21
lines changed

yolov5/README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ TensorRTx inference code base for [ultralytics/yolov5](https://github.com/ultral
3030
<a href="https://github.com/triple-Mu"><img src="https://avatars.githubusercontent.com/u/92794867?s=48&v=4" width="40px;" alt=""/></a>
3131
<a href="https://github.com/xiang-wuu"><img src="https://avatars.githubusercontent.com/u/107029401?s=48&v=4" width="40px;" alt=""/></a>
3232
<a href="https://github.com/uyolo1314"><img src="https://avatars.githubusercontent.com/u/101853326?s=48&v=4" width="40px;" alt=""/></a>
33-
<a href="https://github.com/Rex-LK"><img src="https://avatars.githubusercontent.com/u/74702576?s=96&v=4" width="40px;" alt=""/></a>
33+
<a href="https://github.com/Rex-LK"><img src="https://avatars.githubusercontent.com/u/74702576?s=48&v=4" width="40px;" alt=""/></a>
34+
<a href="https://github.com/PrinceP"><img src="https://avatars.githubusercontent.com/u/10251537?s=48&v=4" width="40px;" alt=""/></a>
3435

3536
## Different versions of yolov5
3637

@@ -128,12 +129,15 @@ wget https://github.com/joannzhang00/ImageNet-dataset-classes-labels/blob/main/i
128129
# Build and serialize TensorRT engine
129130
./yolov5_seg -s yolov5s-seg.wts yolov5s-seg.engine s
130131
131-
# Run inference
132-
./yolov5_seg -d yolov5s-seg.engine ../samples
132+
# Download the labels file
133+
wget -O coco.txt https://raw.githubusercontent.com/amikelive/coco-labels/master/coco-labels-2014_2017.txt
134+
135+
# Run inference with labels file
136+
./yolov5_seg -d yolov5s-seg.engine ../samples coco.txt
133137
```
134138

135139
<p align="center">
136-
<img src="https://user-images.githubusercontent.com/15235574/208305921-0a2ee358-6550-4d36-bb86-867685bfe069.jpg" height="360px;">
140+
<img src="https://user-images.githubusercontent.com/10251537/211291625-1b912483-b6a6-4e92-80c1-434d165b6776.jpg" height="360px;">
137141
</p>
138142

139143
# INT8 Quantization

yolov5/src/utils.h

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@
33

44
#include <dirent.h>
55
#include <opencv2/opencv.hpp>
6+
#include <fstream>
7+
#include <unordered_map>
8+
#include <string>
9+
#include <sstream>
610

711
static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
812
int w, h, x, y;
9-
float r_w = input_w / (img.cols*1.0);
10-
float r_h = input_h / (img.rows*1.0);
13+
float r_w = input_w / (img.cols * 1.0);
14+
float r_h = input_h / (img.rows * 1.0);
1115
if (r_h > r_w) {
1216
w = input_w;
1317
h = r_w * img.rows;
@@ -26,7 +30,7 @@ static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
2630
return out;
2731
}
2832

29-
static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
33+
static inline int read_files_in_dir(const char* p_dir_name, std::vector<std::string>& file_names) {
3034
DIR *p_dir = opendir(p_dir_name);
3135
if (p_dir == nullptr) {
3236
return -1;
@@ -48,5 +52,42 @@ static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::str
4852
return 0;
4953
}
5054

55+
// Function to trim leading and trailing whitespace from a string
56+
static inline std::string trim_leading_whitespace(const std::string& str) {
57+
size_t first = str.find_first_not_of(' ');
58+
if (std::string::npos == first) {
59+
return str;
60+
}
61+
size_t last = str.find_last_not_of(' ');
62+
return str.substr(first, (last - first + 1));
63+
}
64+
// Src: https://stackoverflow.com/questions/16605967
65+
static inline std::string to_string_with_precision(const float a_value, const int n = 2) {
66+
std::ostringstream out;
67+
out.precision(n);
68+
out << std::fixed << a_value;
69+
return out.str();
70+
}
71+
72+
static inline int read_labels(const std::string labels_filename, std::unordered_map<int, std::string>& labels_map) {
73+
74+
std::ifstream file(labels_filename);
75+
// Read each line of the file
76+
std::string line;
77+
int index = 0;
78+
while (std::getline(file, line)) {
79+
// Strip the line of any leading or trailing whitespace
80+
line = trim_leading_whitespace(line);
81+
82+
// Add the stripped line to the labels_map, using the loop index as the key
83+
labels_map[index] = line;
84+
index++;
85+
}
86+
// Close the file
87+
file.close();
88+
89+
return 0;
90+
}
91+
5192
#endif // TRTX_YOLOV5_UTILS_H_
5293

yolov5/yolov5_seg.cpp

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffer
155155
cudaStreamSynchronize(stream);
156156
}
157157

158-
bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, float& gd, float& gw, std::string& img_dir) {
158+
bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, float& gd, float& gw, std::string& img_dir, std::string& labels_filename) {
159159
if (argc < 4) return false;
160160
if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) {
161161
wts = std::string(argv[2]);
@@ -182,9 +182,10 @@ bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, fl
182182
} else {
183183
return false;
184184
}
185-
} else if (std::string(argv[1]) == "-d" && argc == 4) {
185+
} else if (std::string(argv[1]) == "-d" && argc == 5) {
186186
engine = std::string(argv[2]);
187187
img_dir = std::string(argv[3]);
188+
labels_filename = std::string(argv[4]);
188189
} else {
189190
return false;
190191
}
@@ -216,12 +217,6 @@ std::vector<cv::Mat> process_mask(const float* proto, std::vector<Yolo::Detectio
216217
}
217218
e = 1.0f / (1.0f + expf(-e));
218219
mask_mat.at<float>(y, x) = e;
219-
// if (e > 0.5) {
220-
// // TODO(Call for PR): Use different colors for different class ids
221-
// mask_mat.at<cv::Vec3b>(y, x)[2] = 0xFF;
222-
// mask_mat.at<cv::Vec3b>(y, x)[1] = 0x38;
223-
// mask_mat.at<cv::Vec3b>(y, x)[0] = 0x38;
224-
// }
225220
}
226221
}
227222
cv::resize(mask_mat, mask_mat, cv::Size(INPUT_W, INPUT_H));
@@ -251,7 +246,7 @@ cv::Mat scale_mask(cv::Mat mask, cv::Mat img) {
251246
return res;
252247
}
253248

254-
void draw_mask_bbox(cv::Mat& img, std::vector<Yolo::Detection>& dets, std::vector<cv::Mat>& masks) {
249+
void draw_mask_bbox(cv::Mat& img, std::vector<Yolo::Detection>& dets, std::vector<cv::Mat>& masks, std::unordered_map<int, std::string>& labels_map) {
255250
static std::vector<uint32_t> colors = {0xFF3838, 0xFF9D97, 0xFF701F, 0xFFB21D, 0xCFD231, 0x48F90A,
256251
0x92CC17, 0x3DDB86, 0x1A9334, 0x00D4BB, 0x2C99A8, 0x00C2FF,
257252
0x344593, 0x6473FF, 0x0018EC, 0x8438FF, 0x520085, 0xCB38FF,
@@ -273,8 +268,23 @@ void draw_mask_bbox(cv::Mat& img, std::vector<Yolo::Detection>& dets, std::vecto
273268
}
274269

275270
cv::rectangle(img, r, bgr, 2);
276-
// TODO(Call for PR): convert class id to class name
277-
cv::putText(img, std::to_string((int)dets[i].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar::all(0xFF), 2);
271+
272+
// Get the size of the text
273+
cv::Size textSize = cv::getTextSize(labels_map[(int)dets[i].class_id] + " " + to_string_with_precision(dets[i].conf), cv::FONT_HERSHEY_PLAIN, 1.2, 2, NULL);
274+
// Set the top left corner of the rectangle
275+
cv::Point topLeft(r.x, r.y - textSize.height);
276+
277+
// Set the bottom right corner of the rectangle
278+
cv::Point bottomRight(r.x + textSize.width, r.y + textSize.height);
279+
280+
// Set the thickness of the rectangle lines
281+
int lineThickness = 2;
282+
283+
// Draw the rectangle on the image
284+
cv::rectangle(img, topLeft, bottomRight, bgr, -1);
285+
286+
cv::putText(img, labels_map[(int)dets[i].class_id] + " " + to_string_with_precision(dets[i].conf), cv::Point(r.x, r.y + 4), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar::all(0xFF), 2);
287+
278288
}
279289
}
280290

@@ -283,12 +293,14 @@ int main(int argc, char** argv) {
283293

284294
std::string wts_name = "";
285295
std::string engine_name = "";
296+
std::string labels_filename = "";
297+
286298
float gd = 0.0f, gw = 0.0f;
287299
std::string img_dir;
288-
if (!parse_args(argc, argv, wts_name, engine_name, gd, gw, img_dir)) {
300+
if (!parse_args(argc, argv, wts_name, engine_name, gd, gw, img_dir, labels_filename)) {
289301
std::cerr << "arguments not right!" << std::endl;
290302
std::cerr << "./yolov5_seg -s [.wts] [.engine] [n/s/m/l/x or c gd gw] // serialize model to plan file" << std::endl;
291-
std::cerr << "./yolov5_seg -d [.engine] ../samples // deserialize plan file and run inference" << std::endl;
303+
std::cerr << "./yolov5_seg -d [.engine] ../samples coco.txt // deserialize plan file, read the labels file and run inference" << std::endl;
292304
return -1;
293305
}
294306

@@ -328,6 +340,18 @@ int main(int argc, char** argv) {
328340
std::cerr << "read_files_in_dir failed." << std::endl;
329341
return -1;
330342
}
343+
344+
// read the txt file for classnames
345+
std::ifstream labels_file(labels_filename, std::ios::binary);
346+
if (!labels_file.good()) {
347+
std::cerr << "read " << labels_filename << " error!" << std::endl;
348+
return -1;
349+
}
350+
std::unordered_map<int, std::string> labels_map;
351+
read_labels(labels_filename, labels_map);
352+
353+
assert(CLASS_NUM == labels_map.size());
354+
331355

332356
static float prob[BATCH_SIZE * OUTPUT_SIZE1];
333357
static float proto[BATCH_SIZE * OUTPUT_SIZE2];
@@ -398,7 +422,7 @@ int main(int argc, char** argv) {
398422
cv::Mat img = imgs_buffer[b];
399423

400424
auto masks = process_mask(&proto[b * OUTPUT_SIZE2], res);
401-
draw_mask_bbox(img, res, masks);
425+
draw_mask_bbox(img, res, masks, labels_map);
402426
cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
403427
}
404428
fcount = 0;

0 commit comments

Comments
 (0)