@@ -132,7 +132,7 @@ def parse_args(argv=None):
132
132
coco_cats_inv = {}
133
133
color_cache = defaultdict (lambda : {})
134
134
135
- def prep_display (dets_out , img , h , w , undo_transform = True , class_color = False , mask_alpha = 0.45 , fps_str = '' ):
135
+ def prep_display (dets_out , img , h , w , undo_transform = True , class_color = False , mask_alpha = 0.45 , fps_str = '' , maskiou_net = None ):
136
136
"""
137
137
Note: If undo_transform=False then im_h and im_w are allowed to be None.
138
138
"""
@@ -146,14 +146,34 @@ def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, ma
146
146
with timer .env ('Postprocess' ):
147
147
t = postprocess (dets_out , w , h , visualize_lincomb = args .display_lincomb ,
148
148
crop_masks = args .crop ,
149
- score_threshold = args .score_threshold )
149
+ score_threshold = args .score_threshold ,
150
+ maskiou_net = maskiou_net )
150
151
torch .cuda .synchronize ()
151
152
153
+ # FIXME reduce copy
152
154
with timer .env ('Copy' ):
153
155
if cfg .eval_mask_branch :
154
156
# Masks are drawn on the GPU, so don't copy
155
- masks = t [3 ][:args .top_k ]
156
- classes , scores , boxes = [x [:args .top_k ].cpu ().numpy () for x in t [:3 ]]
157
+ masks = t [3 ]
158
+ classes , scores , boxes = [x for x in t [:3 ]]
159
+ if isinstance (scores , list ):
160
+ box_scores = scores [0 ].cpu ().numpy ()
161
+ mask_scores = scores [1 ].cpu ().numpy ()
162
+ # Re-rank predictions by mask scores
163
+ _scores = mask_scores * box_scores
164
+ idx = np .argsort (- _scores )
165
+ scores = box_scores [idx ]
166
+ classes = classes .cpu ().numpy ()[idx ]
167
+ boxes = boxes .cpu ().numpy ()[idx ]
168
+ masks = masks [idx ]
169
+ else :
170
+ scores = scores .cpu ().numpy ()
171
+ classes = classes .cpu ().numpy ()
172
+ boxes = boxes .cpu ().numpy ()
173
+ scores = scores [:args .top_k ]
174
+ classes = classes [:args .top_k ]
175
+ boxes = boxes [:args .top_k ]
176
+ masks = masks [:args .top_k ]
157
177
158
178
num_dets_to_consider = min (args .top_k , classes .shape [0 ])
159
179
for j in range (num_dets_to_consider ):
@@ -257,12 +277,20 @@ def get_color(j, on_gpu=None):
257
277
258
278
return img_numpy
259
279
260
- def prep_benchmark (dets_out , h , w ):
280
+ def prep_benchmark (dets_out , h , w , maskiou_net = None ):
261
281
with timer .env ('Postprocess' ):
262
- t = postprocess (dets_out , w , h , crop_masks = args .crop , score_threshold = args .score_threshold )
282
+ t = postprocess (dets_out , w , h , crop_masks = args .crop , score_threshold = args .score_threshold , maskiou_net = maskiou_net )
263
283
264
284
with timer .env ('Copy' ):
265
- classes , scores , boxes , masks = [x [:args .top_k ].cpu ().numpy () for x in t ]
285
+ classes , scores , boxes , masks = [x [:args .top_k ] for x in t ]
286
+ if isinstance (scores , list ):
287
+ box_scores = scores [0 ].cpu ().numpy ()
288
+ mask_scores = scores [1 ].cpu ().numpy ()
289
+ else :
290
+ scores = scores .cpu ().numpy ()
291
+ classes = classes .cpu ().numpy ()
292
+ boxes = boxes .cpu ().numpy ()
293
+ masks = masks .cpu ().numpy ()
266
294
267
295
with timer .env ('Sync' ):
268
296
# Just in case
@@ -371,7 +399,7 @@ def _bbox_iou(bbox1, bbox2, iscrowd=False):
371
399
ret = jaccard (bbox1 , bbox2 , iscrowd )
372
400
return ret .cpu ()
373
401
374
- def prep_metrics (ap_data , dets , img , gt , gt_masks , h , w , num_crowd , image_id , detections :Detections = None ):
402
+ def prep_metrics (ap_data , dets , img , gt , gt_masks , h , w , num_crowd , image_id , detections :Detections = None , maskiou_net = None ):
375
403
""" Returns a list of APs for this image, with each element being for a class """
376
404
if not args .output_coco_json :
377
405
with timer .env ('Prepare gt' ):
@@ -388,13 +416,19 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de
388
416
crowd_classes , gt_classes = split (gt_classes )
389
417
390
418
with timer .env ('Postprocess' ):
391
- classes , scores , boxes , masks = postprocess (dets , w , h , crop_masks = args .crop , score_threshold = args .score_threshold )
419
+ classes , scores , boxes , masks = postprocess (dets , w , h , crop_masks = args .crop , score_threshold = args .score_threshold , maskiou_net = maskiou_net )
392
420
393
421
if classes .size (0 ) == 0 :
394
422
return
395
423
396
424
classes = list (classes .cpu ().numpy ().astype (int ))
397
- scores = list (scores .cpu ().numpy ().astype (float ))
425
+ if isinstance (scores , list ):
426
+ box_scores = list (scores [0 ].cpu ().numpy ().astype (float ))
427
+ mask_scores = list (scores [1 ].cpu ().numpy ().astype (float ))
428
+ else :
429
+ scores = list (scores .cpu ().numpy ().astype (float ))
430
+ box_scores = scores
431
+ mask_scores = scores
398
432
masks = masks .view (- 1 , h * w ).cuda ()
399
433
boxes = boxes .cuda ()
400
434
@@ -406,8 +440,8 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de
406
440
for i in range (masks .shape [0 ]):
407
441
# Make sure that the bounding box actually makes sense and a mask was produced
408
442
if (boxes [i , 3 ] - boxes [i , 1 ]) * (boxes [i , 2 ] - boxes [i , 0 ]) > 0 :
409
- detections .add_bbox (image_id , classes [i ], boxes [i ,:], scores [i ])
410
- detections .add_mask (image_id , classes [i ], masks [i ,:,:], scores [i ])
443
+ detections .add_bbox (image_id , classes [i ], boxes [i ,:], box_scores [i ])
444
+ detections .add_mask (image_id , classes [i ], masks [i ,:,:], mask_scores [i ])
411
445
return
412
446
413
447
with timer .env ('Eval Setup' ):
@@ -425,8 +459,8 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de
425
459
crowd_bbox_iou_cache = None
426
460
427
461
iou_types = [
428
- ('box' , lambda i ,j : bbox_iou_cache [i , j ].item (), lambda i ,j : crowd_bbox_iou_cache [i ,j ].item ()),
429
- ('mask' , lambda i ,j : mask_iou_cache [i , j ].item (), lambda i ,j : crowd_mask_iou_cache [i ,j ].item ())
462
+ ('box' , lambda i ,j : bbox_iou_cache [i , j ].item (), lambda i ,j : crowd_bbox_iou_cache [i ,j ].item (), lambda i : box_scores [ i ] ),
463
+ ('mask' , lambda i ,j : mask_iou_cache [i , j ].item (), lambda i ,j : crowd_mask_iou_cache [i ,j ].item (), lambda i : mask_scores [ i ] )
430
464
]
431
465
432
466
timer .start ('Main loop' )
@@ -437,7 +471,7 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de
437
471
for iouIdx in range (len (iou_thresholds )):
438
472
iou_threshold = iou_thresholds [iouIdx ]
439
473
440
- for iou_type , iou_func , crowd_func in iou_types :
474
+ for iou_type , iou_func , crowd_func , score_func in iou_types :
441
475
gt_used = [False ] * len (gt_classes )
442
476
443
477
ap_obj = ap_data [iou_type ][iouIdx ][_class ]
@@ -461,7 +495,7 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de
461
495
462
496
if max_match_idx >= 0 :
463
497
gt_used [max_match_idx ] = True
464
- ap_obj .push (scores [ i ] , True )
498
+ ap_obj .push (score_func ( i ) , True )
465
499
else :
466
500
# If the detection matches a crowd, we can just ignore it
467
501
matched_crowd = False
@@ -481,7 +515,7 @@ def prep_metrics(ap_data, dets, img, gt, gt_masks, h, w, num_crowd, image_id, de
481
515
# same result as COCOEval. There aren't even that many crowd annotations to
482
516
# begin with, but accuracy is of the utmost importance.
483
517
if not matched_crowd :
484
- ap_obj .push (scores [ i ] , False )
518
+ ap_obj .push (score_func ( i ) , False )
485
519
timer .stop ('Main loop' )
486
520
487
521
@@ -846,6 +880,7 @@ def evaluate(net:Yolact, dataset, train_mode=False):
846
880
net .detect .use_cross_class_nms = args .cross_class_nms
847
881
cfg .mask_proto_debug = args .mask_proto_debug
848
882
883
+ # TODO Currently we do not support Fast Mask Re-scroing in evalimage, evalimages, and evalvideo
849
884
if args .image is not None :
850
885
if ':' in args .image :
851
886
inp , out = args .image .split (':' )
@@ -921,13 +956,14 @@ def evaluate(net:Yolact, dataset, train_mode=False):
921
956
with timer .env ('Network Extra' ):
922
957
preds = net (batch )
923
958
959
+ maskiou_net = net .get_maskiou_net ()
924
960
# Perform the meat of the operation here depending on our mode.
925
961
if args .display :
926
- img_numpy = prep_display (preds , img , h , w )
962
+ img_numpy = prep_display (preds , img , h , w , maskiou_net = maskiou_net )
927
963
elif args .benchmark :
928
- prep_benchmark (preds , h , w )
964
+ prep_benchmark (preds , h , w , maskiou_net = maskiou_net )
929
965
else :
930
- prep_metrics (ap_data , preds , img , gt , gt_masks , h , w , num_crowd , dataset .ids [image_idx ], detections )
966
+ prep_metrics (ap_data , preds , img , gt , gt_masks , h , w , num_crowd , dataset .ids [image_idx ], detections , maskiou_net = maskiou_net )
931
967
932
968
# First couple of images take longer because we're constructing the graph.
933
969
# Since that's technically initialization, don't include those in the FPS calculations.
0 commit comments