sporterman
diff --git a/‎lane_det/CMakeLists.txt‎
Lines changed: 26 additions & 0 deletions b/‎lane_det/CMakeLists.txt‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎lane_det/README.md‎
Lines changed: 35 additions & 0 deletions b/‎lane_det/README.md‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎lane_det/common.hpp‎
Lines changed: 184 additions & 0 deletions b/‎lane_det/common.hpp‎
Lines changed: 184 additions & 0 deletions
diff --git a/‎lane_det/gen_wts.py‎
Lines changed: 21 additions & 0 deletions b/‎lane_det/gen_wts.py‎
Lines changed: 21 additions & 0 deletions
@@ -0,0 +1,26 @@
+cmake_minimum_required(VERSION 2.6)
+
+project(lane_det)
+
+add_definitions(-std=c++11)
+
+option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Debug)
+
+find_package(CUDA REQUIRED)
+
+set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_30;code=sm_30)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+
+find_package(OpenCV)
+include_directories(OpenCV_INCLUDE_DIRS)
+
+add_executable(lane_det ${PROJECT_SOURCE_DIR}/lane_det.cpp)
+target_link_libraries(lane_det nvinfer)
+target_link_libraries(lane_det cudart)
+target_link_libraries(lane_det ${OpenCV_LIBS})
+
+add_definitions(-O2 -pthread)
+
@@ -0,0 +1,35 @@
+# Ultra-Fast-Lane-Detection
+
+The Pytorch implementation is [Ultra-Fast-Lane-Detection](https://github.com/cfzd/Ultra-Fast-Lane-Detection).
+
+## How to Run
+
+1. generate lane.wts and lane.onnx from pytorch with tusimple_18.pth
+
+git clone https://github.com/wang-xinyu/tensorrtx.git
+git clone https://github.com/cfzd/Ultra-Fast-Lane-Detection.git
+// download its weights 'tusimple_18.pth'
+// copy tensorrtx/lane_det/gen_wts.py into Ultra-Fast-Lane-Detection/
+// ensure the file name is tusimple_18.pth and lane.wts in gen_wts.py
+// go to Ultra-Fast-Lane-Detection
+python gen_wts.py
+// a file 'lane.wts' will be generated.
+// then 
+python pth2onnx.py
+//a file 'lane.onnx' will be generated.
+
+2. build tensorrtx/lane_det and run
+
+  ```
+  mkdir build
+  cd build
+  cmake ..
+  make
+  sudo ./lane_det -s             // serialize model to plan file i.e. 'DBNet.engine'
+  sudo ./lane_det -d  ../data // deserialize plan file and run inference, the images in data will be processed.
+  ```
+
+## More Information
+1. Changed the preprocess and postprocess in tensorrtx, give a different way to convert NHWC to NCHW in preprocess and just show the reslut using opencv rather than saving the result in postprocess.
+2. If there are some bugs where you inference with multi batch_size, just modify the code in preprocess or postprocess, it's not complicated.
+3. Some results are stored in resluts folder.
@@ -0,0 +1,184 @@
+#ifndef LANE_DET_COMMON_H_
+#define LANE_DET_COMMON_H_
+
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <opencv2/opencv.hpp>
+#include "dirent.h"
+#include "NvInfer.h"
+#include <chrono>
+
+#define CHECK(status) \
+    do\
+    {\
+        auto ret = (status);\
+        if (ret != 0)\
+        {\
+            std::cerr << "Cuda failure: " << ret << std::endl;\
+            abort();\
+        }\
+    } while (0)
+
+using namespace nvinfer1;
+
+// TensorRT weight files have a simple space delimited format:
+// [type] [size] <data x size in hex>
+std::map<std::string, Weights> loadWeights(const std::string file) {
+    std::cout << "Loading weights: " << file << std::endl;
+    std::map<std::string, Weights> weightMap;
+
+    // Open weights file
+    std::ifstream input(file);
+    assert(input.is_open() && "Unable to load weight file.");
+
+    // Read number of weight blobs
+    int32_t count;
+    input >> count;
+    assert(count > 0 && "Invalid weight map file.");
+
+    while (count--)
+    {
+        Weights wt{DataType::kFLOAT, nullptr, 0};
+        uint32_t size;
+
+        // Read name and type of blob
+        std::string name;
+        input >> name >> std::dec >> size;
+        wt.type = DataType::kFLOAT;
+
+        // Load blob
+        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
+        for (uint32_t x = 0, y = size; x < y; ++x)
+        {
+            input >> std::hex >> val[x];
+        }
+        wt.values = val;
+
+        wt.count = size;
+        weightMap[name] = wt;
+    }
+
+    return weightMap;
+}
+
+IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, std::string lname, float eps) {
+    float *gamma = (float*)weightMap[lname + ".weight"].values;
+    float *beta = (float*)weightMap[lname + ".bias"].values;
+    float *mean = (float*)weightMap[lname + ".running_mean"].values;
+    float *var = (float*)weightMap[lname + ".running_var"].values;
+    int len = weightMap[lname + ".running_var"].count;
+
+    float *scval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
+    for (int i = 0; i < len; i++) {
+        scval[i] = gamma[i] / sqrt(var[i] + eps);
+    }
+    Weights scale{DataType::kFLOAT, scval, len};
+
+    float *shval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
+    for (int i = 0; i < len; i++) {
+        shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps);
+    }
+    Weights shift{DataType::kFLOAT, shval, len};
+
+    float *pval = reinterpret_cast<float*>(malloc(sizeof(float) * len));
+    for (int i = 0; i < len; i++) {
+        pval[i] = 1.0;
+    }
+    Weights power{DataType::kFLOAT, pval, len};
+
+    weightMap[lname + ".scale"] = scale;
+    weightMap[lname + ".shift"] = shift;
+    weightMap[lname + ".power"] = power;
+    IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power);
+    assert(scale_1);
+    return scale_1;
+}
+
+ILayer* convBnLeaky( INetworkDefinition *network, std::map<std::string, Weights>& weightMap,
+                     ITensor& input, int outch, int ksize, int s, int p, int g,
+                     std::string lname, int i, bool use_bn = false )
+{
+    Weights emptywts{DataType::kFLOAT, nullptr, 0};
+
+    IConvolutionLayer* conv1 = network->addConvolution(input, outch, DimsHW{ ksize, ksize }, weightMap[lname + ".conv"+ std::to_string(i) + ".weight"], weightMap[lname + ".conv" + std::to_string(i)+".bias"]);
+    assert(conv1);
+    conv1->setStride(DimsHW{s, s});
+    conv1->setPadding(DimsHW{p, p});
+    conv1->setNbGroups(g);
+    if (use_bn)
+    {
+        IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".batchnorm"+std::to_string(i), 1e-5);
+        auto relu = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
+        assert(relu);
+        return relu;
+    }
+    else
+    {
+        auto relu = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
+        assert(relu);
+        return relu;
+    }
+}
+
+IActivationLayer* basicBlock(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int inch, int outch, int stride, std::string lname) {
+    Weights emptywts{ DataType::kFLOAT, nullptr, 0 };
+
+    IConvolutionLayer* conv1 = network->addConvolution(input, outch, DimsHW{ 3, 3 }, weightMap[lname + "conv1.weight"], emptywts);
+    assert(conv1);
+    conv1->setStride(DimsHW{ stride, stride });
+    conv1->setPadding(DimsHW{ 1, 1 });
+
+    IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + "bn1", 1e-5);
+
+    IActivationLayer* relu1 = network->addActivation(*bn1->getOutput(0), ActivationType::kRELU);
+    assert(relu1);
+
+    IConvolutionLayer* conv2 = network->addConvolution(*relu1->getOutput(0), outch, DimsHW{ 3, 3 }, weightMap[lname + "conv2.weight"], emptywts);
+    assert(conv2);
+    conv2->setPadding(DimsHW{ 1, 1 });
+
+    IScaleLayer* bn2 = addBatchNorm2d(network, weightMap, *conv2->getOutput(0), lname + "bn2", 1e-5);
+
+    IElementWiseLayer* ew1;
+    if (inch != outch) {
+        IConvolutionLayer* conv3 = network->addConvolution(input, outch, DimsHW{ 1, 1 }, weightMap[lname + "downsample.0.weight"], emptywts);
+        assert(conv3);
+        conv3->setStride(DimsHW{ stride, stride });
+        IScaleLayer* bn3 = addBatchNorm2d(network, weightMap, *conv3->getOutput(0), lname + "downsample.1", 1e-5);
+        ew1 = network->addElementWise(*bn3->getOutput(0), *bn2->getOutput(0), ElementWiseOperation::kSUM);
+    }
+    else {
+        ew1 = network->addElementWise(input, *bn2->getOutput(0), ElementWiseOperation::kSUM);
+    }
+    IActivationLayer* relu2 = network->addActivation(*ew1->getOutput(0), ActivationType::kRELU);
+    assert(relu2);
+    return relu2;
+}
+
+int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
+    DIR *p_dir = opendir(p_dir_name);
+    if (p_dir == nullptr) {
+        return -1;
+    }
+
+    struct dirent* p_file = nullptr;
+    while ((p_file = readdir(p_dir)) != nullptr) {
+        if (strcmp(p_file->d_name, ".") != 0 &&
+                strcmp(p_file->d_name, "..") != 0) {
+            //std::string cur_file_name(p_dir_name);
+            //cur_file_name += "/";
+            //cur_file_name += p_file->d_name;
+            std::string cur_file_name(p_file->d_name);
+            file_names.push_back(cur_file_name);
+        }
+    }
+    closedir(p_dir);
+    return 0;
+}
+
+#endif
+
@@ -0,0 +1,21 @@
+import torch
+import struct
+#import models.crnn as crnn
+from model.model import parsingNet
+
+# Initialize
+model = parsingNet(pretrained = False, backbone='18', cls_dim = (101, 56, 4), use_aux=False).cuda()
+device = 'cpu'
+# Load model
+state_dict = torch.load('tusimple_18.pth', map_location='cpu')['model']
+model.to(device).eval()
+
+f = open('lane.wts', 'w')
+f.write('{}\n'.format(len(state_dict.keys())))
+for k, v in state_dict.items():
+    vr = v.reshape(-1).cpu().numpy()
+    f.write('{} {} '.format(k, len(vr)))
+    for vv in vr:
+        f.write(' ')
+        f.write(struct.pack('>f',float(vv)).hex())
+    f.write('\n')