fix and optimize

wang-xinyu · wang-xinyu · commit 1a8282247468 · 2021-01-29T16:20:48.000+08:00
diff --git a/README.md b/README.md
@@ -10,6 +10,7 @@ All the models are implemented in pytorch/mxnet/tensorflown first, and export a
 
 ## News
 
+- `29 Jan 2021`. U-Net added by [YuzhouPeng](https://github.com/YuzhouPeng).
 - `24 Jan 2021`. IBN-Net added by [TCHeish](https://github.com/TCHeish), PSENet optimized, YOLOv5 v4.0 INT8, etc.
 - `8 Jan 2021`. YOLOv5 s/m/l/x updated to v4.0.
 - `27 Dec 2020`. HRNet-Semantic-Segmentation added by [BaofengZan](https://github.com/BaofengZan).
@@ -24,7 +25,6 @@ All the models are implemented in pytorch/mxnet/tensorflown first, and export a
 - `28 Aug 2020`. [BaofengZan](https://github.com/BaofengZan) added a tutorial for compiling and running tensorrtx on windows.
 - `16 Aug 2020`. [upczww](https://github.com/upczww) added a python wrapper for yolov5.
 - `28 May 2020`. arcface LResNet50E-IR model from [deepinsight/insightface](https://github.com/deepinsight/insightface) implemented. We got 333fps on GTX1080.
-- `22 May 2020`. A new branch [trt4](https://github.com/wang-xinyu/tensorrtx/tree/trt4) created, which is using TensorRT 4 API. Now the master branch is using TensorRT 7 API. But only `yolov4` has been migrated to TensorRT 7 API for now. The rest will be migrated soon. And a tutorial for `migarating from TensorRT 4 to 7` provided.
 
 ## Tutorials
 
@@ -79,6 +79,7 @@ Following models are implemented.
 |[hrnet](./hrnet)| hrnet-image-classification and hrnet-semantic-segmentation, pytorch implementation from [HRNet-Image-Classification](https://github.com/HRNet/HRNet-Image-Classification) and [HRNet-Semantic-Segmentation](https://github.com/HRNet/HRNet-Semantic-Segmentation) |
 |[psenet](./psenet)| PSENet Text Detection, tensorflow implementation from [liuheng92/tensorflow_PSENet](https://github.com/liuheng92/tensorflow_PSENet) |
 |[ibnnet](./ibnnet)| IBN-Net, pytorch implementation from [XingangPan/IBN-Net](https://github.com/XingangPan/IBN-Net), ECCV2018 |
+|[unet](./unet)| U-Net, pytorch implementation from [milesial/Pytorch-UNet](https://github.com/milesial/Pytorch-UNet) |
 
 ## Model Zoo
 
diff --git a/hrnet/hrnet-image-classification/CMakeLists.txt b/hrnet/hrnet-image-classification/CMakeLists.txt
@@ -13,7 +13,7 @@ include_directories(/usr/local/cuda/include)
 link_directories(/usr/local/cuda/lib64)
 
 find_package(OpenCV)
-include_directories(OpenCV_INCLUDE_DIRS)
+include_directories(${OpenCV_INCLUDE_DIRS})
 
 add_executable(hrnet ${PROJECT_SOURCE_DIR}/hrnet.cpp)
 target_link_libraries(hrnet nvinfer)
diff --git a/hrnet/hrnet-semantic-segmentation/CMakeLists.txt b/hrnet/hrnet-semantic-segmentation/CMakeLists.txt
@@ -22,7 +22,7 @@ link_directories(/usr/lib/x86_64-linux-gnu/)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
 
 find_package(OpenCV)
-include_directories(OpenCV_INCLUDE_DIRS)
+include_directories(${OpenCV_INCLUDE_DIRS})
 
 add_executable(hrnetseg ${PROJECT_SOURCE_DIR}/hrnetseg.cpp)
 target_link_libraries(hrnetseg nvinfer)
diff --git a/unet/CMakeLists.txt b/unet/CMakeLists.txt
@@ -30,5 +30,5 @@ add_definitions(-O2 -pthread)
 
 # opencv library
 find_package(OpenCV)
-include_directories(OpenCV_INCLUDE_DIRS)
-target_link_libraries(unet ${OpenCV_LIBS})
+include_directories(${OpenCV_INCLUDE_DIRS})
+target_link_libraries(unet ${OpenCV_LIBS})
diff --git a/yolov5/calibrator.cpp b/yolov5/calibrator.cpp
@@ -3,7 +3,7 @@
 #include <fstream>
 #include <opencv2/dnn/dnn.hpp>
 #include "calibrator.h"
-#include "cuda_runtime_api.h"
+#include "cuda_utils.h"
 #include "utils.h"
 
 Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
diff --git a/yolov5/cuda_utils.h b/yolov5/cuda_utils.h
@@ -0,0 +1,18 @@
+#ifndef TRTX_CUDA_UTILS_H_
+#define TRTX_CUDA_UTILS_H_
+
+#include <cuda_runtime_api.h>
+
+#ifndef CUDA_CHECK
+#define CUDA_CHECK(callstr)\
+    {\
+        cudaError_t error_code = callstr;\
+        if (error_code != cudaSuccess) {\
+            std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__;\
+            assert(0);\
+        }\
+    }
+#endif  // CUDA_CHECK
+
+#endif  // TRTX_CUDA_UTILS_H_
+
diff --git a/yolov5/utils.h b/yolov5/utils.h
@@ -1,43 +1,9 @@
-#ifndef __TRT_UTILS_H_
-#define __TRT_UTILS_H_
+#ifndef TRTX_YOLOV5_UTILS_H_
+#define TRTX_YOLOV5_UTILS_H_
 
-#include <iostream>
-#include <vector>
-#include <algorithm>
-#include <cudnn.h>
 #include <dirent.h>
 #include <opencv2/opencv.hpp>
 
-#ifndef CUDA_CHECK
-
-#define CUDA_CHECK(callstr)                                                                    \
-    {                                                                                          \
-        cudaError_t error_code = callstr;                                                      \
-        if (error_code != cudaSuccess) {                                                       \
-            std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
-            assert(0);                                                                         \
-        }                                                                                      \
-    }
-
-#endif
-
-namespace Tn
-{
-    template<typename T> 
-    void write(char*& buffer, const T& val)
-    {
-        *reinterpret_cast<T*>(buffer) = val;
-        buffer += sizeof(T);
-    }
-
-    template<typename T> 
-    void read(const char*& buffer, T& val)
-    {
-        val = *reinterpret_cast<const T*>(buffer);
-        buffer += sizeof(T);
-    }
-}
-
 static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
     int w, h, x, y;
     float r_w = input_w / (img.cols*1.0);
@@ -82,4 +48,5 @@ static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::str
     return 0;
 }
 
-#endif
+#endif  // TRTX_YOLOV5_UTILS_H_
+
diff --git a/yolov5/yololayer.cu b/yolov5/yololayer.cu
@@ -1,6 +1,25 @@
 #include <assert.h>
+#include <vector>
+#include <iostream>
 #include "yololayer.h"
-#include "utils.h"
+#include "cuda_utils.h"
+
+namespace Tn
+{
+    template<typename T> 
+    void write(char*& buffer, const T& val)
+    {
+        *reinterpret_cast<T*>(buffer) = val;
+        buffer += sizeof(T);
+    }
+
+    template<typename T> 
+    void read(const char*& buffer, T& val)
+    {
+        val = *reinterpret_cast<const T*>(buffer);
+        buffer += sizeof(T);
+    }
+}
 
 using namespace Yolo;
 
@@ -311,3 +330,4 @@ namespace nvinfer1
         return obj;
     }
 }
+
diff --git a/yolov5/yolov5.cpp b/yolov5/yolov5.cpp
@@ -1,6 +1,6 @@
 #include <iostream>
 #include <chrono>
-#include "cuda_runtime_api.h"
+#include "cuda_utils.h"
 #include "logging.h"
 #include "common.hpp"
 #include "utils.h"