yolov5 v4.0 int8

wang-xinyu · wang-xinyu · commit 95b9b3bc9e8d · 2021-01-24T13:58:06.000+08:00
diff --git a/yolov5/CMakeLists.txt b/yolov5/CMakeLists.txt
@@ -27,7 +27,7 @@ target_link_libraries(myplugins nvinfer cudart)
 find_package(OpenCV)
 include_directories(OpenCV_INCLUDE_DIRS)
 
-add_executable(yolov5 ${PROJECT_SOURCE_DIR}/yolov5.cpp)
+add_executable(yolov5 ${PROJECT_SOURCE_DIR}/calibrator.cpp ${PROJECT_SOURCE_DIR}/yolov5.cpp)
 target_link_libraries(yolov5 nvinfer)
 target_link_libraries(yolov5 cudart)
 target_link_libraries(yolov5 myplugins)
diff --git a/yolov5/README.md b/yolov5/README.md
@@ -22,9 +22,9 @@ Currently, we support yolov5 v1.0(yolov5s only), v2.0, v3.0, v3.1 and v4.0.
 
 ## How to Run, yolov5s as example
 
-```
 1. generate yolov5s.wts from pytorch with yolov5s.pt, or download .wts from model zoo
 
+```
 git clone https://github.com/wang-xinyu/tensorrtx.git
 git clone https://github.com/ultralytics/yolov5.git
 // download its weights 'yolov5s.pt'
@@ -33,9 +33,11 @@ git clone https://github.com/ultralytics/yolov5.git
 // go to ultralytics/yolov5
 python gen_wts.py
 // a file 'yolov5s.wts' will be generated.
+```
 
 2. build tensorrtx/yolov5 and run
 
+```
 // put yolov5s.wts into tensorrtx/yolov5
 // go to tensorrtx/yolov5
 // ensure the macro NET in yolov5.cpp is s
@@ -46,16 +48,28 @@ cmake ..
 make
 sudo ./yolov5 -s             // serialize model to plan file i.e. 'yolov5s.engine'
 sudo ./yolov5 -d  ../samples // deserialize plan file and run inference, the images in samples will be processed.
+```
 
 3. check the images generated, as follows. _zidane.jpg and _bus.jpg
 
 4. optional, load and run the tensorrt model in python
 
+```
 // install python-tensorrt, pycuda, etc.
 // ensure the yolov5s.engine and libmyplugins.so have been built
 python yolov5_trt.py
 ```
 
+# INT8 Quantization
+
+1. Prepare calibration images, you can randomly select 1000s images from your train set. For coco, you can also download my calibration images `coco_calib` from [BaiduPan](https://pan.baidu.com/s/1GOm_-JobpyLMAqZWCDUhKg) pwd: a9wh
+
+2. unzip it in yolov5/build
+
+3. set the macro `USE_INT8` in yolov3.cpp and make
+
+4. serialize the model and test
+
 <p align="center">
 <img src="https://user-images.githubusercontent.com/15235574/78247927-4d9fac00-751e-11ea-8b1b-704a0aeb3fcf.jpg">
 </p>
diff --git a/yolov5/calibrator.cpp b/yolov5/calibrator.cpp
@@ -0,0 +1,80 @@
+#include <iostream>
+#include <iterator>
+#include <fstream>
+#include <opencv2/dnn/dnn.hpp>
+#include "calibrator.h"
+#include "cuda_runtime_api.h"
+#include "utils.h"
+
+Int8EntropyCalibrator2::Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache)
+    : batchsize_(batchsize)
+    , input_w_(input_w)
+    , input_h_(input_h)
+    , img_idx_(0)
+    , img_dir_(img_dir)
+    , calib_table_name_(calib_table_name)
+    , input_blob_name_(input_blob_name)
+    , read_cache_(read_cache)
+{
+    input_count_ = 3 * input_w * input_h * batchsize;
+    CUDA_CHECK(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
+    read_files_in_dir(img_dir, img_files_);
+}
+
+Int8EntropyCalibrator2::~Int8EntropyCalibrator2()
+{
+    CUDA_CHECK(cudaFree(device_input_));
+}
+
+int Int8EntropyCalibrator2::getBatchSize() const
+{
+    return batchsize_;
+}
+
+bool Int8EntropyCalibrator2::getBatch(void* bindings[], const char* names[], int nbBindings)
+{
+    if (img_idx_ + batchsize_ > (int)img_files_.size()) {
+        return false;
+    }
+
+    std::vector<cv::Mat> input_imgs_;
+    for (int i = img_idx_; i < img_idx_ + batchsize_; i++) {
+        std::cout << img_files_[i] << "  " << i << std::endl;
+        cv::Mat temp = cv::imread(img_dir_ + img_files_[i]);
+        if (temp.empty()){
+            std::cerr << "Fatal error: image cannot open!" << std::endl;
+            return false;
+        }
+        cv::Mat pr_img = preprocess_img(temp, input_w_, input_h_);
+        input_imgs_.push_back(pr_img);
+    }
+    img_idx_ += batchsize_;
+    cv::Mat blob = cv::dnn::blobFromImages(input_imgs_, 1.0 / 255.0, cv::Size(input_w_, input_h_), cv::Scalar(0, 0, 0), true, false);
+
+    CUDA_CHECK(cudaMemcpy(device_input_, blob.ptr<float>(0), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
+    assert(!strcmp(names[0], input_blob_name_));
+    bindings[0] = device_input_;
+    return true;
+}
+
+const void* Int8EntropyCalibrator2::readCalibrationCache(size_t& length)
+{
+    std::cout << "reading calib cache: " << calib_table_name_ << std::endl;
+    calib_cache_.clear();
+    std::ifstream input(calib_table_name_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good())
+    {
+        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(calib_cache_));
+    }
+    length = calib_cache_.size();
+    return length ? calib_cache_.data() : nullptr;
+}
+
+void Int8EntropyCalibrator2::writeCalibrationCache(const void* cache, size_t length)
+{
+    std::cout << "writing calib cache: " << calib_table_name_ << " size: " << length << std::endl;
+    std::ofstream output(calib_table_name_, std::ios::binary);
+    output.write(reinterpret_cast<const char*>(cache), length);
+}
+
diff --git a/yolov5/calibrator.h b/yolov5/calibrator.h
@@ -0,0 +1,39 @@
+#ifndef ENTROPY_CALIBRATOR_H
+#define ENTROPY_CALIBRATOR_H
+
+#include "NvInfer.h"
+#include <string>
+#include <vector>
+
+//! \class Int8EntropyCalibrator2
+//!
+//! \brief Implements Entropy calibrator 2.
+//!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
+//!
+class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+    Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name, const char* input_blob_name, bool read_cache = true);
+
+    virtual ~Int8EntropyCalibrator2();
+    int getBatchSize() const override;
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
+    const void* readCalibrationCache(size_t& length) override;
+    void writeCalibrationCache(const void* cache, size_t length) override;
+
+private:
+    int batchsize_;
+    int input_w_;
+    int input_h_;
+    int img_idx_;
+    std::string img_dir_;
+    std::vector<std::string> img_files_;
+    size_t input_count_;
+    std::string calib_table_name_;
+    const char* input_blob_name_;
+    bool read_cache_;
+    void* device_input_;
+    std::vector<char> calib_cache_;
+};
+
+#endif // ENTROPY_CALIBRATOR_H
diff --git a/yolov5/common.hpp b/yolov5/common.hpp
@@ -6,45 +6,11 @@
 #include <sstream>
 #include <vector>
 #include <opencv2/opencv.hpp>
-#include <dirent.h>
 #include "NvInfer.h"
 #include "yololayer.h"
 
-#define CHECK(status) \
-    do\
-    {\
-        auto ret = (status);\
-        if (ret != 0)\
-        {\
-            std::cerr << "Cuda failure: " << ret << std::endl;\
-            abort();\
-        }\
-    } while (0)
-
 using namespace nvinfer1;
 
-cv::Mat preprocess_img(cv::Mat& img) {
-    int w, h, x, y;
-    float r_w = Yolo::INPUT_W / (img.cols*1.0);
-    float r_h = Yolo::INPUT_H / (img.rows*1.0);
-    if (r_h > r_w) {
-        w = Yolo::INPUT_W;
-        h = r_w * img.rows;
-        x = 0;
-        y = (Yolo::INPUT_H - h) / 2;
-    } else {
-        w = r_h * img.cols;
-        h = Yolo::INPUT_H;
-        x = (Yolo::INPUT_W - w) / 2;
-        y = 0;
-    }
-    cv::Mat re(h, w, CV_8UC3);
-    cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
-    cv::Mat out(Yolo::INPUT_H, Yolo::INPUT_W, CV_8UC3, cv::Scalar(128, 128, 128));
-    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
-    return out;
-}
-
 cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
     int l, r, t, b;
     float r_w = Yolo::INPUT_W / (img.cols * 1.0);
@@ -290,28 +256,6 @@ ILayer* SPP(INetworkDefinition *network, std::map<std::string, Weights>& weightM
     return cv2;
 }
 
-int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
-    DIR *p_dir = opendir(p_dir_name);
-    if (p_dir == nullptr) {
-        return -1;
-    }
-
-    struct dirent* p_file = nullptr;
-    while ((p_file = readdir(p_dir)) != nullptr) {
-        if (strcmp(p_file->d_name, ".") != 0 &&
-            strcmp(p_file->d_name, "..") != 0) {
-            //std::string cur_file_name(p_dir_name);
-            //cur_file_name += "/";
-            //cur_file_name += p_file->d_name;
-            std::string cur_file_name(p_file->d_name);
-            file_names.push_back(cur_file_name);
-        }
-    }
-
-    closedir(p_dir);
-    return 0;
-}
-
 std::vector<float> getAnchors(std::map<std::string, Weights>& weightMap)
 {
     std::vector<float> anchors_yolo;
diff --git a/yolov5/utils.h b/yolov5/utils.h
@@ -5,6 +5,8 @@
 #include <vector>
 #include <algorithm>
 #include <cudnn.h>
+#include <dirent.h>
+#include <opencv2/opencv.hpp>
 
 #ifndef CUDA_CHECK
 
@@ -21,61 +23,6 @@
 
 namespace Tn
 {
-    class Profiler : public nvinfer1::IProfiler
-    {
-    public:
-        void printLayerTimes(int itrationsTimes)
-        {
-            float totalTime = 0;
-            for (size_t i = 0; i < mProfile.size(); i++)
-            {
-                printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
-                totalTime += mProfile[i].second;
-            }
-            printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
-        }
-    private:
-        typedef std::pair<std::string, float> Record;
-        std::vector<Record> mProfile;
-
-        virtual void reportLayerTime(const char* layerName, float ms)
-        {
-            auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
-            if (record == mProfile.end())
-                mProfile.push_back(std::make_pair(layerName, ms));
-            else
-                record->second += ms;
-        }
-    };
-
-    //Logger for TensorRT info/warning/errors
-    class Logger : public nvinfer1::ILogger
-    {
-    public:
-
-        Logger(): Logger(Severity::kWARNING) {}
-
-        Logger(Severity severity): reportableSeverity(severity) {}
-
-        void log(Severity severity, const char* msg) override
-        {
-            // suppress messages with severity enum value greater than the reportable
-            if (severity > reportableSeverity) return;
-
-            switch (severity)
-            {
-                case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
-                case Severity::kERROR: std::cerr << "ERROR: "; break;
-                case Severity::kWARNING: std::cerr << "WARNING: "; break;
-                case Severity::kINFO: std::cerr << "INFO: "; break;
-                default: std::cerr << "UNKNOWN: "; break;
-            }
-            std::cerr << msg << std::endl;
-        }
-
-        Severity reportableSeverity{Severity::kWARNING};
-    };
-
     template<typename T> 
     void write(char*& buffer, const T& val)
     {
@@ -91,4 +38,48 @@ namespace Tn
     }
 }
 
-#endif
+static inline cv::Mat preprocess_img(cv::Mat& img, int input_w, int input_h) {
+    int w, h, x, y;
+    float r_w = input_w / (img.cols*1.0);
+    float r_h = input_h / (img.rows*1.0);
+    if (r_h > r_w) {
+        w = input_w;
+        h = r_w * img.rows;
+        x = 0;
+        y = (input_h - h) / 2;
+    } else {
+        w = r_h * img.cols;
+        h = input_h;
+        x = (input_w - w) / 2;
+        y = 0;
+    }
+    cv::Mat re(h, w, CV_8UC3);
+    cv::resize(img, re, re.size(), 0, 0, cv::INTER_LINEAR);
+    cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
+    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
+    return out;
+}
+
+static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
+    DIR *p_dir = opendir(p_dir_name);
+    if (p_dir == nullptr) {
+        return -1;
+    }
+
+    struct dirent* p_file = nullptr;
+    while ((p_file = readdir(p_dir)) != nullptr) {
+        if (strcmp(p_file->d_name, ".") != 0 &&
+            strcmp(p_file->d_name, "..") != 0) {
+            //std::string cur_file_name(p_dir_name);
+            //cur_file_name += "/";
+            //cur_file_name += p_file->d_name;
+            std::string cur_file_name(p_file->d_name);
+            file_names.push_back(cur_file_name);
+        }
+    }
+
+    closedir(p_dir);
+    return 0;
+}
+
+#endif
diff --git a/yolov5/yololayer.cu b/yolov5/yololayer.cu
diff --git a/yolov5/yolov5.cpp b/yolov5/yolov5.cpp