Skip to content

Commit ea3472a

Browse files
committed
update README.md
1 parent 6eb6061 commit ea3472a

File tree

7 files changed

+45
-21
lines changed

7 files changed

+45
-21
lines changed

README.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1-
# mixture-model-based-object-detection
2-
mixture-model-based-object-detection
1+
Mixture-Model-based Object Detector: PyTorch Implementation
2+
===========================================================
33

4-
Enviroments:
5-
- python3.6
6-
- pytorch1.1
7-
- torchvision0.3
4+
This repository is the PyTorch implementation of "Mixture-Model-based Bounding Box Density Estimation for Object Detection
5+
6+
Enviroment
7+
----------
8+
* python3.6
9+
* pytorch1.1
10+
* torchvision0.3
11+
12+
Citation
13+
--------
14+
```
15+
@article{yoo2019mmod,
16+
title={Mixture-Model-based Bounding Box Density Estimation for Object Detection},
17+
author={Yoo, Jaeyoung and Seo, Geonseok and Kwak, Nojun},
18+
journal={arXiv preprint arXiv:1911.12721},
19+
year={2019}
20+
}
21+
```

src/lib/backbone.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,12 @@ def build(self):
7676
self.net['stage_p3_1'], self.net['stage_p3_2']])
7777

7878
def forward(self, image):
79+
# import torch
80+
# print('image_r', torch.min(image[0, 0]), torch.mean(image[0, 0]), torch.max(image[0, 0]))
81+
# print('image_g', torch.min(image[0, 1]), torch.mean(image[0, 1]), torch.max(image[0, 1]))
82+
# print('image_b', torch.min(image[0, 2]), torch.mean(image[0, 2]), torch.max(image[0, 2]))
7983
base_fmap = self.net['base'].forward(image)
84+
# print('base_fmap', torch.min(base_fmap), torch.max(base_fmap))
8085
fmap_c3 = self.net['stage_c3'].forward(base_fmap)
8186
fmap_c4 = self.net['stage_c4'].forward(fmap_c3)
8287
fmap_c5 = self.net['stage_c5'].forward(fmap_c4)

src/lib/network_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def create_def_coord(batch_size, output_sizes, coord_range):
5959

6060

6161
def create_box_coord_map(output_size, output_ch, coord_range):
62-
box_coord_map = np.zeros((output_ch, 4, output_size[0], output_size[1]))
62+
box_coord_map = np.zeros((output_ch, 4, output_size[0], output_size[1])).astype(np.float32)
6363
box_coord_map[:, :2] += lib_util.create_coord_map(output_size, coord_range)
6464

6565
# gauss_ch: 4 --> ((0, 1, 2, 3), ...)

src/lib/post_proc.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ def __filter_cls_boxes_s__(self, boxes_s, confs_s, pi_s):
2323
for c in range(self.n_classes - 1):
2424
cls_boxes_sc = boxes_s[c]
2525
cls_confs_sc = confs_s[c]
26-
cls_pi_sc = norm_pi_s.clone()
26+
# cls_pi_sc = norm_pi_s.clone()
2727

2828
if len(cls_boxes_sc) == 0:
2929
continue
3030

31-
keep_idxes = torch.nonzero(cls_pi_sc > self.pi_thresh).view(-1)
31+
keep_idxes = torch.nonzero(norm_pi_s > self.pi_thresh).view(-1)
3232
cls_boxes_sc = cls_boxes_sc[keep_idxes]
3333
cls_confs_sc = cls_confs_sc[keep_idxes]
3434

@@ -50,7 +50,7 @@ def __filter_cls_boxes_s__(self, boxes_s, confs_s, pi_s):
5050
cls_confs_sc = cls_confs_sc[keep_idxes].unsqueeze(dim=1)
5151

5252
labels_css = torch.zeros(cls_confs_sc.shape).float().cuda()
53-
labels_css += (c + 1)
53+
labels_css += c
5454

5555
cls_boxes_sl.append(cls_boxes_sc)
5656
cls_confs_sl.append(cls_confs_sc)
@@ -69,6 +69,7 @@ def __filter_cls_boxes_s__(self, boxes_s, confs_s, pi_s):
6969
return boxes_s, confs_s, labels_s
7070

7171
def forward(self, mu, prob, pi):
72+
# print('mu', torch.min(mu), torch.max(mu))
7273
boxes = mu.transpose(1, 2).clone()
7374
boxes[:, :, [0, 2]] = boxes[:, :, [0, 2]] * (self.input_size[1] / self.coord_range[1])
7475
boxes[:, :, [1, 3]] = boxes[:, :, [1, 3]] * (self.input_size[0] / self.coord_range[0])
@@ -81,5 +82,5 @@ def forward(self, mu, prob, pi):
8182
boxes_s, confs_s, labels_s = self.__filter_cls_boxes_s__(boxes_s, confs_s, pi[i, 0])
8283
boxes_l.append(boxes_s[:self.max_boxes])
8384
confs_l.append(confs_s[:self.max_boxes])
84-
labels_l.append(labels_s[:self.max_boxes])
85+
labels_l.append(labels_s[:self.max_boxes] + 1)
8586
return boxes_l, confs_l, labels_l

src/lib/pre_proc.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ def inv_transform_batch(self, data_dict):
9090
return d_dict
9191

9292
def process(self, sample_dict):
93+
sample_dict['img'] = np.array(sample_dict['img']).astype(np.float32)
94+
sample_dict['boxes'] = np.array(sample_dict['boxes']).astype(np.float32)
95+
sample_dict['labels'] = np.array(sample_dict['labels']).astype(np.float32)
96+
9397
s_dict = self.__augment__(sample_dict)
9498
img_size = np.array(s_dict['img'].shape)[:2]
9599
s_dict['img'], s_dict['boxes'] = pre_util.resize(s_dict['img'], s_dict['boxes'], self.input_size)
@@ -104,9 +108,9 @@ def process(self, sample_dict):
104108

105109
class PreProcAugm(PreProcBase):
106110
def __augment__(self, sample_dict):
107-
img = np.array(sample_dict['img']).astype(np.float32)
108-
boxes = np.array(sample_dict['boxes']).astype(np.float32)
109-
labels = np.array(sample_dict['labels']).astype(np.float32)
111+
img = np.array(sample_dict['img'])
112+
boxes = np.array(sample_dict['boxes'])
113+
labels = np.array(sample_dict['labels'])
110114

111115
img = pre_util.rand_brightness(img)
112116
img = pre_util.rand_contrast(img)

src/lib/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ def sample_coords_from_mog(mu, sig, pi, n_samples):
190190

191191
def create_coord_map(coord_map_size, coord_range):
192192
# gauss_w: 4 --> ((0, 1, 2, 3), ...)
193-
x_map = np.array(list(range(coord_map_size[1])) * coord_map_size[0])
194-
y_map = np.array(list(range(coord_map_size[0])) * coord_map_size[1])
193+
x_map = np.array(list(range(coord_map_size[1])) * coord_map_size[0]).astype(np.float32)
194+
y_map = np.array(list(range(coord_map_size[0])) * coord_map_size[1]).astype(np.float32)
195195

196196
x_map = x_map.reshape((1, 1, coord_map_size[0], coord_map_size[1]))
197197
y_map = y_map.reshape((1, 1, coord_map_size[1], coord_map_size[0]))

train_mmod_res34_voc.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
BASH_FILE="./train_mmod_res34_voc.sh"
44
RESULT_DIR="./result/voc/`(date "+%Y%m%d%H%M%S")`-320x320-mmod_res34"
5-
# LOAD_DIR="./result/voc/20191215020456-320x320-mmod_res34/snapshot/0040000"
5+
#LOAD_DIR="./result/voc/20191216203123-320x320-mmod_res34/snapshot/0100000"
66

77
python3 ./src/run.py \
88
--bash_file=$BASH_FILE \
@@ -45,7 +45,7 @@ python3 ./src/run.py \
4545
'dataset': 'voc',
4646
'dataset_args': {
4747
'roots': ['./data/voc-devkit-2007/VOC2007'],
48-
'types': ['test', 'test'],
48+
'types': ['test'],
4949
'pre_proc': 'base', 'pre_proc_args': {
5050
'max_boxes': 100,
5151
'rgb_mean': [0.485, 0.456, 0.406],
@@ -71,7 +71,7 @@ python3 ./src/run.py \
7171
'init_iter': 0, 'max_iter': 100000, 'max_grad': 7, 'print_intv': 100,
7272
'lr_decay_schd': {40000: 0.1, 70000: 0.1}
7373
}" \
74-
--test_iters="[40000, 70000, 100000]" \
75-
--snapshot_iters="[70000, 100000]" \
74+
--test_iters="[40000, 70000, 90000, 95000, 100000]" \
75+
--snapshot_iters="[40000, 70000, 100000]" \
7676
\
77-
# --load_dir=$LOAD_DIR
77+
#--load_dir=$LOAD_DIR

0 commit comments

Comments
 (0)