update README.md

yoojy31 · yoojy31 · commit ea3472a98d64 · 2019-12-17T19:14:08.000+09:00
diff --git a/README.md b/README.md
@@ -1,7 +1,21 @@
-# mixture-model-based-object-detection
-mixture-model-based-object-detection
+Mixture-Model-based Object Detector: PyTorch Implementation
+===========================================================
 
-Enviroments:
-- python3.6
-- pytorch1.1
-- torchvision0.3
+This repository is the PyTorch implementation of "Mixture-Model-based Bounding Box Density Estimation for Object Detection
+
+Enviroment
+----------
+* python3.6
+* pytorch1.1
+* torchvision0.3
+
+Citation
+--------
+```
+@article{yoo2019mmod,
+  title={Mixture-Model-based Bounding Box Density Estimation for Object Detection},
+  author={Yoo, Jaeyoung and Seo, Geonseok and Kwak, Nojun},
+  journal={arXiv preprint arXiv:1911.12721},
+  year={2019}
+}
+```
diff --git a/src/lib/backbone.py b/src/lib/backbone.py
@@ -76,7 +76,12 @@ def build(self):
              self.net['stage_p3_1'], self.net['stage_p3_2']])
 
     def forward(self, image):
+        # import torch
+        # print('image_r', torch.min(image[0, 0]), torch.mean(image[0, 0]), torch.max(image[0, 0]))
+        # print('image_g', torch.min(image[0, 1]), torch.mean(image[0, 1]), torch.max(image[0, 1]))
+        # print('image_b', torch.min(image[0, 2]), torch.mean(image[0, 2]), torch.max(image[0, 2]))
         base_fmap = self.net['base'].forward(image)
+        # print('base_fmap', torch.min(base_fmap), torch.max(base_fmap))
         fmap_c3 = self.net['stage_c3'].forward(base_fmap)
         fmap_c4 = self.net['stage_c4'].forward(fmap_c3)
         fmap_c5 = self.net['stage_c5'].forward(fmap_c4)
diff --git a/src/lib/network_util.py b/src/lib/network_util.py
@@ -59,7 +59,7 @@ def create_def_coord(batch_size, output_sizes, coord_range):
 
 
 def create_box_coord_map(output_size, output_ch, coord_range):
-    box_coord_map = np.zeros((output_ch, 4, output_size[0], output_size[1]))
+    box_coord_map = np.zeros((output_ch, 4, output_size[0], output_size[1])).astype(np.float32)
     box_coord_map[:, :2] += lib_util.create_coord_map(output_size, coord_range)
 
     # gauss_ch: 4 --> ((0, 1, 2, 3), ...)
diff --git a/src/lib/post_proc.py b/src/lib/post_proc.py
@@ -23,12 +23,12 @@ def __filter_cls_boxes_s__(self, boxes_s, confs_s, pi_s):
         for c in range(self.n_classes - 1):
             cls_boxes_sc = boxes_s[c]
             cls_confs_sc = confs_s[c]
-            cls_pi_sc = norm_pi_s.clone()
+            # cls_pi_sc = norm_pi_s.clone()
 
             if len(cls_boxes_sc) == 0:
                 continue
 
-            keep_idxes = torch.nonzero(cls_pi_sc > self.pi_thresh).view(-1)
+            keep_idxes = torch.nonzero(norm_pi_s > self.pi_thresh).view(-1)
             cls_boxes_sc = cls_boxes_sc[keep_idxes]
             cls_confs_sc = cls_confs_sc[keep_idxes]
 
@@ -50,7 +50,7 @@ def __filter_cls_boxes_s__(self, boxes_s, confs_s, pi_s):
                 cls_confs_sc = cls_confs_sc[keep_idxes].unsqueeze(dim=1)
 
             labels_css = torch.zeros(cls_confs_sc.shape).float().cuda()
-            labels_css += (c + 1)
+            labels_css += c
 
             cls_boxes_sl.append(cls_boxes_sc)
             cls_confs_sl.append(cls_confs_sc)
@@ -69,6 +69,7 @@ def __filter_cls_boxes_s__(self, boxes_s, confs_s, pi_s):
         return boxes_s, confs_s, labels_s
 
     def forward(self, mu, prob, pi):
+        # print('mu', torch.min(mu), torch.max(mu))
         boxes = mu.transpose(1, 2).clone()
         boxes[:, :, [0, 2]] = boxes[:, :, [0, 2]] * (self.input_size[1] / self.coord_range[1])
         boxes[:, :, [1, 3]] = boxes[:, :, [1, 3]] * (self.input_size[0] / self.coord_range[0])
@@ -81,5 +82,5 @@ def forward(self, mu, prob, pi):
             boxes_s, confs_s, labels_s = self.__filter_cls_boxes_s__(boxes_s, confs_s, pi[i, 0])
             boxes_l.append(boxes_s[:self.max_boxes])
             confs_l.append(confs_s[:self.max_boxes])
-            labels_l.append(labels_s[:self.max_boxes])
+            labels_l.append(labels_s[:self.max_boxes] + 1)
         return boxes_l, confs_l, labels_l
diff --git a/src/lib/pre_proc.py b/src/lib/pre_proc.py
@@ -90,6 +90,10 @@ def inv_transform_batch(self, data_dict):
         return d_dict
 
     def process(self, sample_dict):
+        sample_dict['img'] = np.array(sample_dict['img']).astype(np.float32)
+        sample_dict['boxes'] = np.array(sample_dict['boxes']).astype(np.float32)
+        sample_dict['labels'] = np.array(sample_dict['labels']).astype(np.float32)
+
         s_dict = self.__augment__(sample_dict)
         img_size = np.array(s_dict['img'].shape)[:2]
         s_dict['img'], s_dict['boxes'] = pre_util.resize(s_dict['img'], s_dict['boxes'], self.input_size)
@@ -104,9 +108,9 @@ def process(self, sample_dict):
 
 class PreProcAugm(PreProcBase):
     def __augment__(self, sample_dict):
-        img = np.array(sample_dict['img']).astype(np.float32)
-        boxes = np.array(sample_dict['boxes']).astype(np.float32)
-        labels = np.array(sample_dict['labels']).astype(np.float32)
+        img = np.array(sample_dict['img'])
+        boxes = np.array(sample_dict['boxes'])
+        labels = np.array(sample_dict['labels'])
 
         img = pre_util.rand_brightness(img)
         img = pre_util.rand_contrast(img)
diff --git a/src/lib/util.py b/src/lib/util.py
@@ -190,8 +190,8 @@ def sample_coords_from_mog(mu, sig, pi, n_samples):
 
 def create_coord_map(coord_map_size, coord_range):
     # gauss_w: 4 --> ((0, 1, 2, 3), ...)
-    x_map = np.array(list(range(coord_map_size[1])) * coord_map_size[0])
-    y_map = np.array(list(range(coord_map_size[0])) * coord_map_size[1])
+    x_map = np.array(list(range(coord_map_size[1])) * coord_map_size[0]).astype(np.float32)
+    y_map = np.array(list(range(coord_map_size[0])) * coord_map_size[1]).astype(np.float32)
 
     x_map = x_map.reshape((1, 1, coord_map_size[0], coord_map_size[1]))
     y_map = y_map.reshape((1, 1, coord_map_size[1], coord_map_size[0]))
diff --git a/train_mmod_res34_voc.sh b/train_mmod_res34_voc.sh
@@ -2,7 +2,7 @@
 
 BASH_FILE="./train_mmod_res34_voc.sh"
 RESULT_DIR="./result/voc/`(date "+%Y%m%d%H%M%S")`-320x320-mmod_res34"
-# LOAD_DIR="./result/voc/20191215020456-320x320-mmod_res34/snapshot/0040000"
+#LOAD_DIR="./result/voc/20191216203123-320x320-mmod_res34/snapshot/0100000"
 
 python3 ./src/run.py \
 --bash_file=$BASH_FILE \
@@ -45,7 +45,7 @@ python3 ./src/run.py \
     'dataset': 'voc',
     'dataset_args': {
         'roots': ['./data/voc-devkit-2007/VOC2007'],
-        'types': ['test', 'test'],
+        'types': ['test'],
         'pre_proc': 'base', 'pre_proc_args': {
             'max_boxes': 100,
             'rgb_mean': [0.485, 0.456, 0.406],
@@ -71,7 +71,7 @@ python3 ./src/run.py \
     'init_iter': 0, 'max_iter': 100000, 'max_grad': 7, 'print_intv': 100,
     'lr_decay_schd': {40000: 0.1, 70000: 0.1}
 }" \
---test_iters="[40000, 70000, 100000]" \
---snapshot_iters="[70000, 100000]" \
+--test_iters="[40000, 70000, 90000, 95000, 100000]" \
+--snapshot_iters="[40000, 70000, 100000]" \
 \
-# --load_dir=$LOAD_DIR
+#--load_dir=$LOAD_DIR