bug fix

upczww · upczww · commit 9cca2bd20547 · 2020-09-23T09:15:01.000+08:00
diff --git a/yolov5/yolov5_trt.py b/yolov5/yolov5_trt.py
@@ -16,15 +16,14 @@
 import torch
 import torchvision
 
-
 INPUT_W = 608
 INPUT_H = 608
-CONF_THRESH = 0.5
+CONF_THRESH = 0.1
 IOU_THRESHOLD = 0.4
 
 
 def plot_one_box(x, img, color=None, label=None, line_thickness=None):
-    '''
+    """
     description: Plots one bounding box on image img,
                  this function comes from YoLov5 project.
     param: 
@@ -36,9 +35,10 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
     return:
         no return
 
-    '''
-    tl = line_thickness or round(
-        0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
+    """
+    tl = (
+        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
+    )  # line/font thickness
     color = color or [random.randint(0, 255) for _ in range(3)]
     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
@@ -47,14 +47,23 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
         cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3,
-                    [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+        cv2.putText(
+            img,
+            label,
+            (c1[0], c1[1] - 2),
+            0,
+            tl / 3,
+            [225, 255, 255],
+            thickness=tf,
+            lineType=cv2.LINE_AA,
+        )
 
 
 class YoLov5TRT(object):
-    '''
+    """
     description: A YOLOv5 class that warps TensorRT ops, preprocess and postprocess ops.
-    '''
+    """
+
     def __init__(self, engine_file_path):
         # Create a Context on this device,
         self.cfx = cuda.Device(0).make_context()
@@ -74,8 +83,7 @@ def __init__(self, engine_file_path):
         bindings = []
 
         for binding in engine:
-            size = trt.volume(engine.get_binding_shape(
-                binding)) * engine.max_batch_size
+            size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
             dtype = trt.nptype(engine.get_binding_dtype(binding))
             # Allocate host and device buffers
             host_mem = cuda.pagelocked_empty(size, dtype)
@@ -102,7 +110,7 @@ def __init__(self, engine_file_path):
 
     def infer(self, input_image_path):
         threading.Thread.__init__(self)
-        # Make self the active context, pushing it on top of the context stack. 
+        # Make self the active context, pushing it on top of the context stack.
         self.cfx.push()
         # Restore
         stream = self.stream
@@ -115,7 +123,8 @@ def infer(self, input_image_path):
         bindings = self.bindings
         # Do image preprocess
         input_image, image_raw, origin_h, origin_w = self.preprocess_image(
-            input_image_path)
+            input_image_path
+        )
         # Copy input image to host buffer
         np.copyto(host_inputs[0], input_image.ravel())
         # Transfer input data  to the GPU.
@@ -132,23 +141,29 @@ def infer(self, input_image_path):
         output = host_outputs[0]
         # Do postprocess
         result_boxes, result_scores, result_classid = self.post_process(
-            output, origin_h, origin_w)
+            output, origin_h, origin_w
+        )
         # Draw rectangles and labels on the original image
         for i in range(len(result_boxes)):
             box = result_boxes[i]
-            plot_one_box(box, image_raw, label="{}:{:.2f}".format(
-                categories[int(result_classid[i])], result_scores[i]))
+            plot_one_box(
+                box,
+                image_raw,
+                label="{}:{:.2f}".format(
+                    categories[int(result_classid[i])], result_scores[i]
+                ),
+            )
         parent, filename = os.path.split(input_image_path)
-        save_name = os.path.join(parent, "output_"+filename)
-        #　Save image
+        save_name = os.path.join(parent, "output_" + filename)
+        # 　Save image
         cv2.imwrite(save_name, image_raw)
 
     def destory(self):
         # Remove any context from the top of the context stack, deactivating it.
         self.cfx.pop()
 
     def preprocess_image(self, input_image_path):
-        '''
+        """
         description: Read an image from image path, convert it to RGB,
                      resize and pad it to target size, normalize to [0,1],
                      transform to NCHW format.
@@ -159,7 +174,7 @@ def preprocess_image(self, input_image_path):
             image_raw: the original image
             h: original height
             w: original width
-        '''
+        """
         image_raw = cv2.imread(input_image_path)
         h, w, c = image_raw.shape
         image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
@@ -169,18 +184,21 @@ def preprocess_image(self, input_image_path):
         if r_h > r_w:
             tw = INPUT_W
             th = int(r_w * h)
-            tx = 0
-            ty = int((INPUT_H - th) / 2)
+            tx1 = tx2 = 0
+            ty1 = int((INPUT_H - th) / 2)
+            ty2 = INPUT_H - th - ty1
         else:
             tw = int(r_h * w)
             th = INPUT_H
-            tx = int((INPUT_W - tw) / 2)
-            ty = 0
+            tx1 = int((INPUT_W - tw) / 2)
+            tx2 = INPUT_W - tw - tx1
+            ty1 = ty2 = 0
         # Resize the image with long side while maintaining ratio
         image = cv2.resize(image, (tw, th))
         # Pad the short side with (128,128,128)
         image = cv2.copyMakeBorder(
-            image, ty, ty, tx, tx, cv2.BORDER_CONSTANT, (128, 128, 128))
+            image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
+        )
         image = image.astype(np.float32)
         # Normalize to [0,1]
         image /= 255.0
@@ -193,36 +211,35 @@ def preprocess_image(self, input_image_path):
         return image, image_raw, h, w
 
     def xywh2xyxy(self, origin_h, origin_w, x):
-        '''
+        """
         description:    Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
         param:
             origin_h:   height of original image
             origin_w:   width of original image
             x:          A boxes tensor, each row is a box [center_x, center_y, w, h]
         return:
             y:          A boxes tensor, each row is a box [x1, y1, x2, y2]
-        '''
-        y = torch.zeros_like(x) if isinstance(
-            x, torch.Tensor) else np.zeros_like(x)
+        """
+        y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
         r_w = INPUT_W / origin_w
         r_h = INPUT_H / origin_h
         if r_h > r_w:
-            y[:, 0] = x[:, 0] - x[:, 2]/2
-            y[:, 2] = x[:, 0] + x[:, 2]/2
-            y[:, 1] = x[:, 1] - x[:, 3]/2 - (INPUT_H - r_w * origin_h) / 2
-            y[:, 3] = x[:, 1] + x[:, 3]/2 - (INPUT_H - r_w * origin_h) / 2
+            y[:, 0] = x[:, 0] - x[:, 2] / 2
+            y[:, 2] = x[:, 0] + x[:, 2] / 2
+            y[:, 1] = x[:, 1] - x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
+            y[:, 3] = x[:, 1] + x[:, 3] / 2 - (INPUT_H - r_w * origin_h) / 2
             y /= r_w
         else:
-            y[:, 0] = x[:, 0] - x[:, 2]/2 - (INPUT_W - r_h * origin_w) / 2
-            y[:, 2] = x[:, 0] + x[:, 2]/2 - (INPUT_W - r_h * origin_w) / 2
-            y[:, 1] = x[:, 1] - x[:, 3]/2
-            y[:, 3] = x[:, 1] + x[:, 3]/2
+            y[:, 0] = x[:, 0] - x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
+            y[:, 2] = x[:, 0] + x[:, 2] / 2 - (INPUT_W - r_h * origin_w) / 2
+            y[:, 1] = x[:, 1] - x[:, 3] / 2
+            y[:, 3] = x[:, 1] + x[:, 3] / 2
             y /= r_h
 
         return y
 
     def post_process(self, output, origin_h, origin_w):
-        '''
+        """
         description: postprocess the prediction
         param:
             output:     A tensor likes [num_boxes,cx,cy,w,h,conf,cls_id, cx,cy,w,h,conf,cls_id, ...] 
@@ -232,7 +249,7 @@ def post_process(self, output, origin_h, origin_w):
             result_boxes: finally boxes, a boxes tensor, each row is a box [x1, y1, x2, y2]
             result_scores: finally scores, a tensor, each element is the score correspoing to box
             result_classid: finally classid, a tensor, each element is the classid correspoing to box
-        '''
+        """
         # Get the num of boxes detected
         num = int(output[0])
         # Reshape to a two dimentional ndarray
@@ -253,8 +270,7 @@ def post_process(self, output, origin_h, origin_w):
         # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
         boxes = self.xywh2xyxy(origin_h, origin_w, boxes)
         # Do nms
-        indices = torchvision.ops.nms(
-            boxes, scores, iou_threshold=IOU_THRESHOLD).cpu()
+        indices = torchvision.ops.nms(boxes, scores, iou_threshold=IOU_THRESHOLD).cpu()
         result_boxes = boxes[indices, :].cpu()
         result_scores = scores[indices].cpu()
         result_classid = classid[indices].cpu()
@@ -271,30 +287,35 @@ def run(self):
         self.func(*self.args)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # load custom plugins
-    PLUGIN_LIBRARY = 'build/libmyplugins.so'
+    PLUGIN_LIBRARY = "build/libmyplugins.so"
     ctypes.CDLL(PLUGIN_LIBRARY)
     engine_file_path = "build/yolov5s.engine"
 
     # load coco labels
-    coco_labels = "coco_labels.txt"
-    categories = []
-    with open(coco_labels, "r") as f:
-        for line in f:
-            categories.append(line.strip())
+
+    categories = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+            "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+            "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+            "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+            "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+            "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+            "hair drier", "toothbrush"]
 
     # a  YoLov5TRT instance
-    yolov5_warpper = YoLov5TRT(engine_file_path)
+    yolov5_wrapper = YoLov5TRT(engine_file_path)
 
+    # from https://github.com/ultralytics/yolov5/tree/master/inference/images
     input_image_paths = ["zidane.jpg", "bus.jpg"]
 
     for input_image_path in input_image_paths:
         # create a new thread to do inference
-        thread1 = myThread(yolov5_warpper.infer, [input_image_path])
+        thread1 = myThread(yolov5_wrapper.infer, [input_image_path])
         thread1.start()
         thread1.join()
 
     # destory the instance
-    yolov5_warpper.destory()
-
+    yolov5_wrapper.destory()