11import os
22import os .path as osp
3- import shutil
4- import time
53
64import mmcv
75import numpy as np
86import torch
7+ import torch .distributed as dist
98from mmcv .runner import Hook , obj_from_dict
109from mmcv .parallel import scatter , collate
1110from pycocotools .cocoeval import COCOeval
@@ -29,43 +28,14 @@ def __init__(self, dataset, interval=1):
2928 'dataset must be a Dataset object or a dict, not {}' .format (
3029 type (dataset )))
3130 self .interval = interval
32- self .lock_dir = None
33-
34- def _barrier (self , rank , world_size ):
35- """Due to some issues with `torch.distributed.barrier()`, we have to
36- implement this ugly barrier function.
37- """
38- if rank == 0 :
39- for i in range (1 , world_size ):
40- tmp = osp .join (self .lock_dir , '{}.pkl' .format (i ))
41- while not (osp .exists (tmp )):
42- time .sleep (1 )
43- for i in range (1 , world_size ):
44- tmp = osp .join (self .lock_dir , '{}.pkl' .format (i ))
45- os .remove (tmp )
46- else :
47- tmp = osp .join (self .lock_dir , '{}.pkl' .format (rank ))
48- mmcv .dump ([], tmp )
49- while osp .exists (tmp ):
50- time .sleep (1 )
51-
52- def before_run (self , runner ):
53- self .lock_dir = osp .join (runner .work_dir , '.lock_map_hook' )
54- if runner .rank == 0 :
55- if osp .exists (self .lock_dir ):
56- shutil .rmtree (self .lock_dir )
57- mmcv .mkdir_or_exist (self .lock_dir )
58-
59- def after_run (self , runner ):
60- if runner .rank == 0 :
61- shutil .rmtree (self .lock_dir )
6231
6332 def after_train_epoch (self , runner ):
6433 if not self .every_n_epochs (runner , self .interval ):
6534 return
6635 runner .model .eval ()
6736 results = [None for _ in range (len (self .dataset ))]
68- prog_bar = mmcv .ProgressBar (len (self .dataset ))
37+ if runner .rank == 0 :
38+ prog_bar = mmcv .ProgressBar (len (self .dataset ))
6939 for idx in range (runner .rank , len (self .dataset ), runner .world_size ):
7040 data = self .dataset [idx ]
7141 data_gpu = scatter (
@@ -79,12 +49,13 @@ def after_train_epoch(self, runner):
7949 results [idx ] = result
8050
8151 batch_size = runner .world_size
82- for _ in range (batch_size ):
83- prog_bar .update ()
52+ if runner .rank == 0 :
53+ for _ in range (batch_size ):
54+ prog_bar .update ()
8455
8556 if runner .rank == 0 :
8657 print ('\n ' )
87- self . _barrier ( runner . rank , runner . world_size )
58+ dist . barrier ( )
8859 for i in range (1 , runner .world_size ):
8960 tmp_file = osp .join (runner .work_dir , 'temp_{}.pkl' .format (i ))
9061 tmp_results = mmcv .load (tmp_file )
@@ -96,8 +67,8 @@ def after_train_epoch(self, runner):
9667 tmp_file = osp .join (runner .work_dir ,
9768 'temp_{}.pkl' .format (runner .rank ))
9869 mmcv .dump (results , tmp_file )
99- self . _barrier ( runner . rank , runner . world_size )
100- self . _barrier ( runner . rank , runner . world_size )
70+ dist . barrier ( )
71+ dist . barrier ( )
10172
10273 def evaluate (self ):
10374 raise NotImplementedError
@@ -179,7 +150,13 @@ def evaluate(self, runner, results):
179150 cocoEval .evaluate ()
180151 cocoEval .accumulate ()
181152 cocoEval .summarize ()
182- field = '{}_mAP' .format (res_type )
183- runner .log_buffer .output [field ] = cocoEval .stats [0 ]
153+ metrics = ['mAP' , 'mAP_50' , 'mAP_75' , 'mAP_s' , 'mAP_m' , 'mAP_l' ]
154+ for i in range (len (metrics )):
155+ key = '{}_{}' .format (res_type , metrics [i ])
156+ val = float ('{:.3f}' .format (cocoEval .stats [i ]))
157+ runner .log_buffer .output [key ] = val
158+ runner .log_buffer .output ['{}_mAP_copypaste' .format (res_type )] = (
159+ '{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} '
160+ '{ap[4]:.3f} {ap[5]:.3f}' ).format (ap = cocoEval .stats [:6 ])
184161 runner .log_buffer .ready = True
185162 os .remove (tmp_file )
0 commit comments