Skip to content

Commit 94e12e8

Browse files
authored
Support DMNet (#313)
* Support DMNet * fix doc and delete norm_name
1 parent feefc6a commit 94e12e8

19 files changed

+337
-5
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Supported methods:
7575
- [x] [DANet](configs/danet)
7676
- [x] [APCNet](configs/apcnet)
7777
- [x] [GCNet](configs/gcnet)
78+
- [x] [DMNet](configs/dmnet)
7879
- [x] [ANN](configs/ann)
7980
- [x] [OCRNet](configs/ocrnet)
8081
- [x] [Fast-SCNN](configs/fastscnn)

configs/_base_/models/dmnet_r50-d8.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# model settings
2+
norm_cfg = dict(type='SyncBN', requires_grad=True)
3+
model = dict(
4+
type='EncoderDecoder',
5+
pretrained='open-mmlab://resnet50_v1c',
6+
backbone=dict(
7+
type='ResNetV1c',
8+
depth=50,
9+
num_stages=4,
10+
out_indices=(0, 1, 2, 3),
11+
dilations=(1, 1, 2, 4),
12+
strides=(1, 2, 1, 1),
13+
norm_cfg=norm_cfg,
14+
norm_eval=False,
15+
style='pytorch',
16+
contract_dilation=True),
17+
decode_head=dict(
18+
type='DMHead',
19+
in_channels=2048,
20+
in_index=3,
21+
channels=512,
22+
filter_sizes=(1, 3, 5, 7),
23+
dropout_ratio=0.1,
24+
num_classes=19,
25+
norm_cfg=dict(type='SyncBN', requires_grad=True),
26+
align_corners=False,
27+
loss_decode=dict(
28+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29+
auxiliary_head=dict(
30+
type='FCNHead',
31+
in_channels=1024,
32+
in_index=2,
33+
channels=256,
34+
num_convs=1,
35+
concat_input=False,
36+
dropout_ratio=0.1,
37+
num_classes=19,
38+
norm_cfg=norm_cfg,
39+
align_corners=False,
40+
loss_decode=dict(
41+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)))
42+
# model training and testing settings
43+
train_cfg = dict()
44+
test_cfg = dict(mode='whole')

configs/dmnet/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Dynamic Multi-scale Filters for Semantic Segmentation
2+
3+
## Introduction
4+
5+
```latex
6+
@InProceedings{He_2019_ICCV,
7+
author = {He, Junjun and Deng, Zhongying and Qiao, Yu},
8+
title = {Dynamic Multi-Scale Filters for Semantic Segmentation},
9+
booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
10+
month = {October},
11+
year = {2019}
12+
}
13+
```
14+
15+
## Results and models
16+
17+
### Cityscapes
18+
19+
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
20+
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
21+
| DMNet | R-50-D8 | 512x1024 | 40000 | 7.0 | 3.66 | 77.78 | 79.14 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json) |
22+
| DMNet | R-101-D8 | 512x1024 | 40000 | 10.6 | 2.54 | 78.37 | 79.72 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) |
23+
| DMNet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.57 | 78.49 | 80.27 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json) |
24+
| DMNet | R-101-D8 | 769x769 | 40000 | 12.0 | 1.01 | 77.62 | 78.94 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json) |
25+
| DMNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.07 | 80.22 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json) |
26+
| DMNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.64 | 80.67 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) |
27+
| DMNet | R-50-D8 | 769x769 | 80000 | - | - | 79.22 | 80.55 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json) |
28+
| DMNet | R-101-D8 | 769x769 | 80000 | - | - | 79.19 | 80.65 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json) |
29+
30+
### ADE20K
31+
32+
| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download |
33+
|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
34+
| DMNet | R-50-D8 | 512x512 | 80000 | 9.4 | 20.95 | 42.37 | 43.62 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json) |
35+
| DMNet | R-101-D8 | 512x512 | 80000 | 13.0 | 13.88 | 45.34 | 46.13 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json) |
36+
| DMNet | R-50-D8 | 512x512 | 160000 | - | - | 43.15 | 44.17 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json) |
37+
| DMNet | R-101-D8 | 512x512 | 160000 | - | - | 45.42 | 46.76 | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) | [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) |
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
_base_ = './dmnet_r50-d8_512x1024_40k_cityscapes.py'
2+
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
_base_ = './dmnet_r50-d8_512x1024_80k_cityscapes.py'
2+
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
_base_ = './dmnet_r50-d8_512x512_160k_ade20k.py'
2+
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
_base_ = './dmnet_r50-d8_512x512_80k_ade20k.py'
2+
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
_base_ = './dmnet_r50-d8_769x769_40k_cityscapes.py'
2+
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
_base_ = './dmnet_r50-d8_769x769_80k_cityscapes.py'
2+
model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101))
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
_base_ = [
2+
'../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
3+
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
4+
]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
_base_ = [
2+
'../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
3+
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4+
]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
_base_ = [
2+
'../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py',
3+
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
4+
]
5+
model = dict(
6+
decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
7+
test_cfg = dict(mode='whole')
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
_base_ = [
2+
'../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py',
3+
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4+
]
5+
model = dict(
6+
decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150))
7+
test_cfg = dict(mode='whole')
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
_base_ = [
2+
'../_base_/models/dmnet_r50-d8.py',
3+
'../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
4+
'../_base_/schedules/schedule_40k.py'
5+
]
6+
model = dict(
7+
decode_head=dict(align_corners=True),
8+
auxiliary_head=dict(align_corners=True))
9+
test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
_base_ = [
2+
'../_base_/models/dmnet_r50-d8.py',
3+
'../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
4+
'../_base_/schedules/schedule_80k.py'
5+
]
6+
model = dict(
7+
decode_head=dict(align_corners=True),
8+
auxiliary_head=dict(align_corners=True))
9+
test_cfg = dict(mode='slide', crop_size=(769, 769), stride=(513, 513))

docs/model_zoo.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ Please refer to [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master
7979

8080
Please refer to [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) for details.
8181

82+
### DMNet
83+
84+
Please refer to [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) for details.
85+
8286
### ANN
8387

8488
Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) for details.

mmseg/models/decode_heads/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .aspp_head import ASPPHead
44
from .cc_head import CCHead
55
from .da_head import DAHead
6+
from .dm_head import DMHead
67
from .dnl_head import DNLHead
78
from .ema_head import EMAHead
89
from .enc_head import EncHead
@@ -22,5 +23,5 @@
2223
'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
2324
'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
2425
'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead',
25-
'PointHead', 'APCHead'
26+
'PointHead', 'APCHead', 'DMHead'
2627
]

mmseg/models/decode_heads/dm_head.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
4+
from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer
5+
6+
from ..builder import HEADS
7+
from .decode_head import BaseDecodeHead
8+
9+
10+
class DCM(nn.Module):
11+
"""Dynamic Convolutional Module used in DMNet.
12+
13+
Args:
14+
filter_size (int): The filter size of generated convolution kernel
15+
used in Dynamic Convolutional Module.
16+
fusion (bool): Add one conv to fuse DCM output feature.
17+
in_channels (int): Input channels.
18+
channels (int): Channels after modules, before conv_seg.
19+
conv_cfg (dict | None): Config of conv layers.
20+
norm_cfg (dict | None): Config of norm layers.
21+
act_cfg (dict): Config of activation layers.
22+
"""
23+
24+
def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg,
25+
norm_cfg, act_cfg):
26+
super(DCM, self).__init__()
27+
self.filter_size = filter_size
28+
self.fusion = fusion
29+
self.in_channels = in_channels
30+
self.channels = channels
31+
self.conv_cfg = conv_cfg
32+
self.norm_cfg = norm_cfg
33+
self.act_cfg = act_cfg
34+
self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1,
35+
0)
36+
37+
self.input_redu_conv = ConvModule(
38+
self.in_channels,
39+
self.channels,
40+
1,
41+
conv_cfg=self.conv_cfg,
42+
norm_cfg=self.norm_cfg,
43+
act_cfg=self.act_cfg)
44+
45+
if self.norm_cfg is not None:
46+
self.norm = build_norm_layer(self.norm_cfg, self.channels)[1]
47+
else:
48+
self.norm = None
49+
self.activate = build_activation_layer(self.act_cfg)
50+
51+
if self.fusion:
52+
self.fusion_conv = ConvModule(
53+
self.channels,
54+
self.channels,
55+
1,
56+
conv_cfg=self.conv_cfg,
57+
norm_cfg=self.norm_cfg,
58+
act_cfg=self.act_cfg)
59+
60+
def forward(self, x):
61+
"""Forward function."""
62+
generted_filter = self.filter_gen_conv(
63+
F.adaptive_avg_pool2d(x, self.filter_size))
64+
x = self.input_redu_conv(x)
65+
b, c, h, w = x.shape
66+
# [1, b * c, h, w], c = self.channels
67+
x = x.view(1, b * c, h, w)
68+
# [b * c, 1, filter_size, filter_size]
69+
generted_filter = generted_filter.view(b * c, 1, self.filter_size,
70+
self.filter_size)
71+
pad = (self.filter_size - 1) // 2
72+
if (self.filter_size - 1) % 2 == 0:
73+
p2d = (pad, pad, pad, pad)
74+
else:
75+
p2d = (pad + 1, pad, pad + 1, pad)
76+
x = F.pad(input=x, pad=p2d, mode='constant', value=0)
77+
# [1, b * c, h, w]
78+
output = F.conv2d(input=x, weight=generted_filter, groups=b * c)
79+
# [b, c, h, w]
80+
output = output.view(b, c, h, w)
81+
if self.norm is not None:
82+
output = self.norm(output)
83+
output = self.activate(output)
84+
85+
if self.fusion:
86+
output = self.fusion_conv(output)
87+
88+
return output
89+
90+
91+
@HEADS.register_module()
92+
class DMHead(BaseDecodeHead):
93+
"""Dynamic Multi-scale Filters for Semantic Segmentation.
94+
95+
This head is the implementation of
96+
`DMNet <https://openaccess.thecvf.com/content_ICCV_2019/papers/\
97+
He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_\
98+
ICCV_2019_paper.pdf>`_.
99+
100+
Args:
101+
filter_sizes (tuple[int]): The size of generated convolutional filters
102+
used in Dynamic Convolutional Module. Default: (1, 3, 5, 7).
103+
fusion (bool): Add one conv to fuse DCM output feature.
104+
"""
105+
106+
def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs):
107+
super(DMHead, self).__init__(**kwargs)
108+
assert isinstance(filter_sizes, (list, tuple))
109+
self.filter_sizes = filter_sizes
110+
self.fusion = fusion
111+
dcm_modules = []
112+
for filter_size in self.filter_sizes:
113+
dcm_modules.append(
114+
DCM(filter_size,
115+
self.fusion,
116+
self.in_channels,
117+
self.channels,
118+
conv_cfg=self.conv_cfg,
119+
norm_cfg=self.norm_cfg,
120+
act_cfg=self.act_cfg))
121+
self.dcm_modules = nn.ModuleList(dcm_modules)
122+
self.bottleneck = ConvModule(
123+
self.in_channels + len(filter_sizes) * self.channels,
124+
self.channels,
125+
3,
126+
padding=1,
127+
conv_cfg=self.conv_cfg,
128+
norm_cfg=self.norm_cfg,
129+
act_cfg=self.act_cfg)
130+
131+
def forward(self, inputs):
132+
"""Forward function."""
133+
x = self._transform_inputs(inputs)
134+
dcm_outs = [x]
135+
for dcm_module in self.dcm_modules:
136+
dcm_outs.append(dcm_module(x))
137+
dcm_outs = torch.cat(dcm_outs, dim=1)
138+
output = self.bottleneck(dcm_outs)
139+
output = self.cls_seg(output)
140+
return output

0 commit comments

Comments
 (0)