magicwifi
diff --git a/‎scaled-yolov4/CMakeLists.txt‎
Lines changed: 37 additions & 0 deletions b/‎scaled-yolov4/CMakeLists.txt‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎scaled-yolov4/README.md‎
Lines changed: 56 additions & 0 deletions b/‎scaled-yolov4/README.md‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎scaled-yolov4/common.hpp‎
Lines changed: 196 additions & 0 deletions b/‎scaled-yolov4/common.hpp‎
Lines changed: 196 additions & 0 deletions
diff --git a/‎scaled-yolov4/gen_wts.py‎
Lines changed: 23 additions & 0 deletions b/‎scaled-yolov4/gen_wts.py‎
Lines changed: 23 additions & 0 deletions
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 2.6)
+
+project(yolov4)
+
+add_definitions(-std=c++11)
+
+option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Debug)
+
+find_package(CUDA REQUIRED)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
+# cuda
+include_directories(/usr/local/cuda/include)
+link_directories(/usr/local/cuda/lib64)
+# tensorrt
+include_directories(/usr/include/x86_64-linux-gnu/)
+link_directories(/usr/lib/x86_64-linux-gnu/)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")
+
+cuda_add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/yololayer.cu ${PROJECT_SOURCE_DIR}/mish.cu)
+target_link_libraries(myplugins nvinfer cudart)
+
+find_package(OpenCV)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+add_executable(yolov4csp ${PROJECT_SOURCE_DIR}/yolov4_csp.cpp)
+target_link_libraries(yolov4csp nvinfer)
+target_link_libraries(yolov4csp cudart)
+target_link_libraries(yolov4csp myplugins)
+target_link_libraries(yolov4csp ${OpenCV_LIBS})
+
+add_definitions(-O2 -pthread)
+
@@ -0,0 +1,56 @@
+# scaled-yolov4
+
+The Pytorch implementation is from [WongKinYiu/ScaledYOLOv4 yolov4-csp branch](https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp). It can load yolov4-csp.cfg and yolov4-csp.weights(from AlexeyAB/darknet).
+
+Note: There is a slight difference in yolov4-csp.cfg for darknet and pytorch. Use the one given in the above repo.
+
+## Config
+
+- Input shape `INPUT_H`, `INPUT_W` defined in yololayer.h
+- Number of classes `CLASS_NUM` defined in yololayer.h
+- FP16/FP32 can be selected by the macro `USE_FP16` in yolov4_csp.cpp
+- GPU id can be selected by the macro `DEVICE` in yolov4_csp.cpp
+- NMS thresh `NMS_THRESH` in yolov4_csp.cpp
+- bbox confidence threshold `BBOX_CONF_THRESH` in yolov4_csp.cpp
+- `BATCH_SIZE` in yolov4_csp.cpp
+
+## How to run
+
+1. generate yolov4_csp.wts from pytorch implementation with yolov4-csp.cfg and yolov4-csp.weights.
+
+```
+git clone https://github.com/wang-xinyu/tensorrtx.git
+git clone -b yolov4-csp https://github.com/WongKinYiu/ScaledYOLOv4.git
+// download yolov4-csp.weights from https://github.com/WongKinYiu/ScaledYOLOv4/tree/yolov4-csp#yolov4-csp
+cp {tensorrtx}/scaled-yolov4/gen_wts.py {ScaledYOLOv4/}
+cd {ScaledYOLOv4/}
+python gen_wts.py yolov4-csp.weights
+// a file 'yolov4_csp.wts' will be generated.
+```
+
+2. put yolov4_csp.wts into {tensorrtx}/scaled-yolov4, build and run
+
+```
+mv yolov4_csp.wts {tensorrtx}/scaled-yolov4/
+cd {tensorrtx}/scaled-yolov4
+mkdir build
+cd build
+cmake ..
+make
+sudo ./yolov4csp -s                          // serialize model to plan file i.e. 'yolov4csp.engine'
+sudo ./yolov4csp -d ../../yolov3-spp/samples // deserialize plan file and run inference, the images in samples will be processed.
+```
+
+3. check the images generated, as follows. _zidane.jpg and _bus.jpg
+<p align="center">
+<img src= https://user-images.githubusercontent.com/39617050/117172509-824cf980-ade9-11eb-8e4c-27dbe658e355.jpg>
+</p>
+
+<p align="center">
+<img src= https://user-images.githubusercontent.com/39617050/117172880-dbb52880-ade9-11eb-839a-0814fd46198e.jpg>
+</p>
+
+
+## More Information
+
+See the readme in [home page.](https://github.com/wang-xinyu/tensorrtx)
@@ -0,0 +1,196 @@
+#include <fstream>
+#include <map>
+#include <sstream>
+#include <vector>
+#include <opencv2/opencv.hpp>
+
+#include "NvInfer.h"
+#include "yololayer.h"
+#include "mish.h"
+
+
+using namespace nvinfer1;
+
+cv::Mat preprocess_img(cv::Mat& img) {
+    int w, h, x, y;
+    float r_w = Yolo::INPUT_W / (img.cols*1.0);
+    float r_h = Yolo::INPUT_H / (img.rows*1.0);
+    if (r_h > r_w) {
+        w = Yolo::INPUT_W;
+        h = r_w * img.rows;
+        x = 0;
+        y = (Yolo::INPUT_H - h) / 2;
+    } else {
+        w = r_h* img.cols;
+        h = Yolo::INPUT_H;
+        x = (Yolo::INPUT_W - w) / 2;
+        y = 0;
+    }
+    cv::Mat re(h, w, CV_8UC3);
+    cv::resize(img, re, re.size());
+    cv::Mat out(Yolo::INPUT_H, Yolo::INPUT_W, CV_8UC3, cv::Scalar(128, 128, 128));
+    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
+    return out;
+}
+
+cv::Rect get_rect(cv::Mat& img, float bbox[4]) {
+    int l, r, t, b;
+    float r_w = Yolo::INPUT_W / (img.cols * 1.0);
+    float r_h = Yolo::INPUT_H / (img.rows * 1.0);
+    if (r_h > r_w) {
+        l = bbox[0] - bbox[2]/2.f;
+        r = bbox[0] + bbox[2]/2.f;
+        t = bbox[1] - bbox[3]/2.f - (Yolo::INPUT_H - r_w * img.rows) / 2;
+        b = bbox[1] + bbox[3]/2.f - (Yolo::INPUT_H - r_w * img.rows) / 2;
+        l = l / r_w;
+        r = r / r_w;
+        t = t / r_w;
+        b = b / r_w;
+    } else {
+        l = bbox[0] - bbox[2]/2.f - (Yolo::INPUT_W - r_h * img.cols) / 2;
+        r = bbox[0] + bbox[2]/2.f - (Yolo::INPUT_W - r_h * img.cols) / 2;
+        t = bbox[1] - bbox[3]/2.f;
+        b = bbox[1] + bbox[3]/2.f;
+        l = l / r_h;
+        r = r / r_h;
+        t = t / r_h;
+        b = b / r_h;
+    }
+    return cv::Rect(l, t, r-l, b-t);
+}
+
+float iou(float lbox[4], float rbox[4]) {
+    float interBox[] = {
+        std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
+        std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
+        std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
+        std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
+    };
+
+    if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
+        return 0.0f;
+
+    float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
+    return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
+}
+
+bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) {
+    return a.det_confidence > b.det_confidence;
+}
+
+void nms(std::vector<Yolo::Detection>& res, float *output, float conf_thresh, float nms_thresh = 0.5) {
+    int det_size = sizeof(Yolo::Detection) / sizeof(float);
+    std::map<float, std::vector<Yolo::Detection>> m;
+    for (int i = 0; i < output[0] && i < Yolo::MAX_OUTPUT_BBOX_COUNT; i++) {
+        if (output[1 + det_size * i + 4] <= conf_thresh) continue;
+        Yolo::Detection det;
+        memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float));
+        if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Yolo::Detection>());
+        m[det.class_id].push_back(det);
+    }
+    for (auto it = m.begin(); it != m.end(); it++) {
+        //std::cout << it->second[0].class_id << " --- " << std::endl;
+        auto& dets = it->second;
+        std::sort(dets.begin(), dets.end(), cmp);
+        for (size_t m = 0; m < dets.size(); ++m) {
+            auto& item = dets[m];
+            res.push_back(item);
+            for (size_t n = m + 1; n < dets.size(); ++n) {
+                if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
+                    dets.erase(dets.begin()+n);
+                    --n;
+                }
+            }
+        }
+    }
+}
+
+// TensorRT weight files have a simple space delimited format:
+// [type] [size] <data x size in hex>
+std::map<std::string, Weights> loadWeights(const std::string file) {
+    std::cout << "Loading weights: " << file << std::endl;
+    std::map<std::string, Weights> weightMap;
+
+    // Open weights file
+    std::ifstream input(file);
+    assert(input.is_open() && "Unable to load weight file.");
+
+    // Read number of weight blobs
+    int32_t count;
+    input >> count;
+    assert(count > 0 && "Invalid weight map file.");
+
+    while (count--)
+    {
+        Weights wt{DataType::kFLOAT, nullptr, 0};
+        uint32_t size;
+
+        // Read name and type of blob
+        std::string name;
+        input >> name >> std::dec >> size;
+        wt.type = DataType::kFLOAT;
+
+        // Load blob
+        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
+        for (uint32_t x = 0, y = size; x < y; ++x)
+        {
+            input >> std::hex >> val[x];
+        }
+        wt.values = val;
+        
+        wt.count = size;
+        weightMap[name] = wt;
+    }
+
+    return weightMap;
+}
+
+IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
+    float *gamma = (float*)weightMap[lname + ".weight"].values;
+    float *beta = (float*)weightMap[lname + ".bias"].values;
+    float *mean = (float*)weightMap[lname + ".running_mean"].values;
+    float *var = (float*)weightMap[lname + ".running_var"].values;
+    int len = weightMap[lname + ".running_var"].count;
+
+    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
+    for (int i = 0; i < len; i++) {
+        scval[i] = gamma[i] / sqrt(var[i] + eps);
+    }
+    Weights scale{DataType::kFLOAT, scval, len};
+    
+    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
+    for (int i = 0; i < len; i++) {
+        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
+    }
+    Weights shift{DataType::kFLOAT, shval, len};
+
+    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
+    for (int i = 0; i < len; i++) {
+        pval[i] = 1.0;
+    }
+    Weights power{DataType::kFLOAT, pval, len};
+
+    weightMap[lname + ".scale"] = scale;
+    weightMap[lname + ".shift"] = shift;
+    weightMap[lname + ".power"] = power;
+    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
+    assert(scale_1);
+    return scale_1;
+}
+
+ILayer* convBnMish(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int outch, int ksize, int s, int p, int linx) {
+    Weights emptywts{DataType::kFLOAT, nullptr, 0};
+    IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{ksize, ksize}, weightMap["module_list." + std::to_string(linx) + ".Conv2d.weight"], emptywts);
+    assert(conv1);
+    conv1->setStrideNd(DimsHW{s, s});
+    conv1->setPaddingNd(DimsHW{p, p});
+
+    IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), "module_list." + std::to_string(linx) + ".BatchNorm2d", 1e-4);
+
+    auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
+    const PluginFieldCollection* pluginData = creator->getFieldNames();
+    IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(linx)).c_str(), pluginData);
+    ITensor* inputTensors[] = {bn1->getOutput(0)};
+    auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
+    return mish;
+}
@@ -0,0 +1,23 @@
+import struct
+import sys
+from models.models import *
+from utils import *
+
+model = Darknet('models/yolov4-csp.cfg', (512, 512))
+weights = sys.argv[1]
+device = torch_utils.select_device('0')
+if weights.endswith('.pt'):  # pytorch format
+    model.load_state_dict(torch.load(weights, map_location=device)['model'])
+else:  # darknet format
+    load_darknet_weights(model, weights)
+
+with open('yolov4_csp.wts', 'w') as f:
+    f.write('{}\n'.format(len(model.state_dict().keys())))
+    for k, v in model.state_dict().items():
+        vr = v.reshape(-1).cpu().numpy()
+        f.write('{} {} '.format(k, len(vr)))
+        for vv in vr:
+            f.write(' ')
+            f.write(struct.pack('>f',float(vv)).hex())
+        f.write('\n')
+