|
| 1 | +#include "faster-api.h" |
| 2 | +#include <opencv2/highgui/highgui.hpp> |
| 3 | +#include <opencv2/imgproc/imgproc.hpp> |
| 4 | +#include <vector> |
| 5 | +#include <fstream> |
| 6 | + |
| 7 | +using std::string; |
| 8 | +using std::vector; |
| 9 | +using namespace caffe; |
| 10 | +using std::max; |
| 11 | +using std::min; |
| 12 | + |
| 13 | + |
| 14 | + |
| 15 | +cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta){ |
| 16 | + cv::Mat pre_box(local_anchors.rows, local_anchors.cols, CV_32FC1); |
| 17 | + for (int i = 0; i < local_anchors.rows; i++) |
| 18 | + { |
| 19 | + double pred_ctr_x, pred_ctr_y, src_ctr_x, src_ctr_y; |
| 20 | + double dst_ctr_x, dst_ctr_y, dst_scl_x, dst_scl_y; |
| 21 | + double src_w, src_h, pred_w, pred_h; |
| 22 | + src_w = local_anchors.at<float>(i, 2) - local_anchors.at<float>(i, 0) + 1; |
| 23 | + src_h = local_anchors.at<float>(i, 3) - local_anchors.at<float>(i, 1) + 1; |
| 24 | + src_ctr_x = local_anchors.at<float>(i, 0) + 0.5 * src_w; |
| 25 | + src_ctr_y = local_anchors.at<float>(i, 1) + 0.5 * src_h; |
| 26 | + |
| 27 | + dst_ctr_x = boxs_delta.at<float>(i, 0); |
| 28 | + dst_ctr_y = boxs_delta.at<float>(i, 1); |
| 29 | + dst_scl_x = boxs_delta.at<float>(i, 2); |
| 30 | + dst_scl_y = boxs_delta.at<float>(i, 3); |
| 31 | + pred_ctr_x = dst_ctr_x*src_w + src_ctr_x; |
| 32 | + pred_ctr_y = dst_ctr_y*src_h + src_ctr_y; |
| 33 | + pred_w = exp(dst_scl_x) * src_w; |
| 34 | + pred_h = exp(dst_scl_y) * src_h; |
| 35 | + |
| 36 | + pre_box.at<float>(i, 0) = pred_ctr_x - 0.5*pred_w; |
| 37 | + pre_box.at<float>(i, 1) = pred_ctr_y - 0.5*pred_h; |
| 38 | + pre_box.at<float>(i, 2) = pred_ctr_x + 0.5*pred_w; |
| 39 | + pre_box.at<float>(i, 3) = pred_ctr_y + 0.5*pred_h; |
| 40 | + } |
| 41 | + return pre_box; |
| 42 | +} |
| 43 | + |
| 44 | +void nms(std::vector<abox> &input_boxes, float nms_thresh){ |
| 45 | + std::vector<float>vArea(input_boxes.size()); |
| 46 | + for (int i = 0; i < input_boxes.size(); ++i) |
| 47 | + { |
| 48 | + vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) |
| 49 | + * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); |
| 50 | + } |
| 51 | + for (int i = 0; i < input_boxes.size(); ++i) |
| 52 | + { |
| 53 | + for (int j = i + 1; j < input_boxes.size();) |
| 54 | + { |
| 55 | + float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1); |
| 56 | + float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1); |
| 57 | + float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2); |
| 58 | + float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2); |
| 59 | + float w = std::max(float(0), xx2 - xx1 + 1); |
| 60 | + float h = std::max(float(0), yy2 - yy1 + 1); |
| 61 | + float inter = w * h; |
| 62 | + float ovr = inter / (vArea[i] + vArea[j] - inter); |
| 63 | + if (ovr >= nms_thresh) |
| 64 | + { |
| 65 | + input_boxes.erase(input_boxes.begin() + j); |
| 66 | + vArea.erase(vArea.begin() + j); |
| 67 | + } |
| 68 | + else |
| 69 | + { |
| 70 | + j++; |
| 71 | + } |
| 72 | + } |
| 73 | + } |
| 74 | +} |
| 75 | + |
| 76 | + |
| 77 | + |
| 78 | + |
| 79 | +ObjectDetector::ObjectDetector(const std::string &model_file,const std::string &weights_file){ |
| 80 | +#ifdef CPU_ONLY |
| 81 | + Caffe::set_mode(Caffe::CPU); |
| 82 | +#else |
| 83 | + Caffe::set_mode(Caffe::GPU); |
| 84 | +#endif |
| 85 | + net_.reset(new Net<float>(model_file, TEST)); |
| 86 | + net_->CopyTrainedLayersFrom(weights_file); |
| 87 | + this->class_num_ = net_->blob_by_name("cls_prob")->channels(); //求得类别数+1 |
| 88 | +} |
| 89 | + |
| 90 | +//对一张图片,进行检测,将结果保存进map数据结构里,分别表示每个类别对应的目标框,如果需要分数信息,则计算分数 |
| 91 | +map<int,vector<cv::Rect> > ObjectDetector::detect(const cv::Mat& image,map<int,vector<float> >* objectScore){ |
| 92 | + |
| 93 | + if(objectScore!=NULL) //如果需要保存置信度 |
| 94 | + objectScore->clear(); |
| 95 | + |
| 96 | + float CONF_THRESH = 0.8; //置信度阈值 |
| 97 | + float NMS_THRESH = 0.3; //非极大值抑制阈值 |
| 98 | + int max_side = max(image.rows, image.cols); //分别求出图片宽和高的较大者 |
| 99 | + int min_side = min(image.rows, image.cols); |
| 100 | + float max_side_scale = float(max_side) / float(INPUT_SIZE_LONG); //分别求出缩放因子 |
| 101 | + float min_side_scale = float(min_side) / float(INPUT_SIZE_NARROW); |
| 102 | + float max_scale = max(max_side_scale, min_side_scale); |
| 103 | + |
| 104 | + float img_scale = float(1) / max_scale; |
| 105 | + int height = int(image.rows * img_scale); |
| 106 | + int width = int(image.cols * img_scale); |
| 107 | + |
| 108 | + int num_out; |
| 109 | + cv::Mat cv_resized; |
| 110 | + image.convertTo(cv_resized, CV_32FC3); |
| 111 | + cv::resize(cv_resized, cv_resized, cv::Size(width, height)); |
| 112 | + cv::Mat mean(height, width, cv_resized.type(), cv::Scalar(102.9801, 115.9465, 122.7717)); |
| 113 | + cv::Mat normalized; |
| 114 | + subtract(cv_resized, mean, normalized); |
| 115 | + |
| 116 | + float im_info[3]; |
| 117 | + im_info[0] = height; |
| 118 | + im_info[1] = width; |
| 119 | + im_info[2] = img_scale; |
| 120 | + boost::shared_ptr<Blob<float> > input_layer = net_->blob_by_name("data"); |
| 121 | + input_layer->Reshape(1, normalized.channels(), height, width); |
| 122 | + net_->Reshape(); |
| 123 | + float* input_data = input_layer->mutable_cpu_data(); |
| 124 | + vector<cv::Mat> input_channels; |
| 125 | + for (int i = 0; i < input_layer->channels(); ++i) { |
| 126 | + cv::Mat channel(height, width, CV_32FC1, input_data); |
| 127 | + input_channels.push_back(channel); |
| 128 | + input_data += height * width; |
| 129 | + } |
| 130 | + cv::split(normalized, input_channels); |
| 131 | + net_->blob_by_name("im_info")->set_cpu_data(im_info); |
| 132 | + net_->Forward(); //进行网络前向传播 |
| 133 | + |
| 134 | + |
| 135 | + int num = net_->blob_by_name("rois")->num(); //产生的 ROI 个数,比如为 13949个ROI |
| 136 | + const float *rois_data = net_->blob_by_name("rois")->cpu_data(); //维度比如为:13949*5*1*1 |
| 137 | + int num1 = net_->blob_by_name("bbox_pred")->num(); //预测的矩形框 维度为 13949*84 |
| 138 | + cv::Mat rois_box(num, 4, CV_32FC1); |
| 139 | + for (int i = 0; i < num; ++i) |
| 140 | + { |
| 141 | + rois_box.at<float>(i, 0) = rois_data[i * 5 + 1] / img_scale; |
| 142 | + rois_box.at<float>(i, 1) = rois_data[i * 5 + 2] / img_scale; |
| 143 | + rois_box.at<float>(i, 2) = rois_data[i * 5 + 3] / img_scale; |
| 144 | + rois_box.at<float>(i, 3) = rois_data[i * 5 + 4] / img_scale; |
| 145 | + } |
| 146 | + |
| 147 | + boost::shared_ptr<Blob<float> > bbox_delt_data = net_->blob_by_name("bbox_pred"); // 13949*84 |
| 148 | + boost::shared_ptr<Blob<float> > score = net_->blob_by_name("cls_prob"); // 3949*21 |
| 149 | + |
| 150 | + map<int,vector<cv::Rect> > label_objs; //每个类别,对应的检测目标框 |
| 151 | + for (int i = 1; i < class_num_; ++i){ //对每个类,进行遍历 |
| 152 | + cv::Mat bbox_delt(num, 4, CV_32FC1); |
| 153 | + for (int j = 0; j < num; ++j){ |
| 154 | + bbox_delt.at<float>(j, 0) = bbox_delt_data->data_at(j, i * 4 + 0, 0, 0); |
| 155 | + bbox_delt.at<float>(j, 1) = bbox_delt_data->data_at(j, i * 4 + 1, 0, 0); |
| 156 | + bbox_delt.at<float>(j, 2) = bbox_delt_data->data_at(j, i * 4 + 2, 0, 0); |
| 157 | + bbox_delt.at<float>(j, 3) = bbox_delt_data->data_at(j, i * 4 + 3, 0, 0); |
| 158 | + } |
| 159 | + cv::Mat box_class = bbox_tranform_inv(rois_box, bbox_delt); |
| 160 | + |
| 161 | + vector<abox> aboxes; //对于 类别i,检测出的矩形框保存在这 |
| 162 | + for (int j = 0; j < box_class.rows; ++j){ |
| 163 | + if (box_class.at<float>(j, 0) < 0) box_class.at<float>(j, 0) = 0; |
| 164 | + if (box_class.at<float>(j, 0) > (image.cols - 1)) box_class.at<float>(j, 0) = image.cols - 1; |
| 165 | + if (box_class.at<float>(j, 2) < 0) box_class.at<float>(j, 2) = 0; |
| 166 | + if (box_class.at<float>(j, 2) > (image.cols - 1)) box_class.at<float>(j, 2) = image.cols - 1; |
| 167 | + |
| 168 | + if (box_class.at<float>(j, 1) < 0) box_class.at<float>(j, 1) = 0; |
| 169 | + if (box_class.at<float>(j, 1) > (image.rows - 1)) box_class.at<float>(j, 1) = image.rows - 1; |
| 170 | + if (box_class.at<float>(j, 3) < 0) box_class.at<float>(j, 3) = 0; |
| 171 | + if (box_class.at<float>(j, 3) > (image.rows - 1)) box_class.at<float>(j, 3) = image.rows - 1; |
| 172 | + abox tmp; |
| 173 | + tmp.x1 = box_class.at<float>(j, 0); |
| 174 | + tmp.y1 = box_class.at<float>(j, 1); |
| 175 | + tmp.x2 = box_class.at<float>(j, 2); |
| 176 | + tmp.y2 = box_class.at<float>(j, 3); |
| 177 | + tmp.score = score->data_at(j, i, 0, 0); |
| 178 | + aboxes.push_back(tmp); |
| 179 | + } |
| 180 | + std::sort(aboxes.rbegin(), aboxes.rend()); |
| 181 | + nms(aboxes, NMS_THRESH); //与非极大值抑制消除对于的矩形框 |
| 182 | + for (int k = 0; k < aboxes.size();){ |
| 183 | + if (aboxes[k].score < CONF_THRESH) |
| 184 | + aboxes.erase(aboxes.begin() + k); |
| 185 | + else |
| 186 | + k++; |
| 187 | + } |
| 188 | + //################ 将类别i的所有检测框,保存 |
| 189 | + vector<cv::Rect> rect(aboxes.size()); //对于类别i,检测出的矩形框 |
| 190 | + for(int ii=0;ii<aboxes.size();++ii) |
| 191 | + rect[ii]=cv::Rect(cv::Point(aboxes[ii].x1,aboxes[ii].y1),cv::Point(aboxes[ii].x2,aboxes[ii].y2)); |
| 192 | + label_objs[i]=rect; |
| 193 | + //################ 将类别i的所有检测框的打分,保存 |
| 194 | + if(objectScore!=NULL){ //################ 将类别i的所有检测框的打分,保存 |
| 195 | + vector<float> tmp(aboxes.size()); //对于 类别i,检测出的矩形框的得分 |
| 196 | + for(int ii=0;ii<aboxes.size();++ii) |
| 197 | + tmp[ii]=aboxes[ii].score; |
| 198 | + objectScore->insert(pair<int,vector<float> >(i,tmp)); |
| 199 | + } |
| 200 | + } |
| 201 | + return label_objs; |
| 202 | +} |
0 commit comments