Skip to content

Commit 2918220

Browse files
[Feature] add setr cityscapes benchmark (open-mmlab#1087)
* [Feature] add setr cityscapes benchmark * change pretrain * Update configs/_base_/datasets/cityscapes_768x768.py Co-authored-by: Junjun2016 <[email protected]> * remove redundant keys * remove redundant keys * fix lint error * update readme * update pretrain Co-authored-by: Junjun2016 <[email protected]>
1 parent 1b41989 commit 2918220

9 files changed

+221
-6
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
_base_ = './cityscapes.py'
2+
img_norm_cfg = dict(
3+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
4+
crop_size = (768, 768)
5+
train_pipeline = [
6+
dict(type='LoadImageFromFile'),
7+
dict(type='LoadAnnotations'),
8+
dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
9+
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10+
dict(type='RandomFlip', prob=0.5),
11+
dict(type='PhotoMetricDistortion'),
12+
dict(type='Normalize', **img_norm_cfg),
13+
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14+
dict(type='DefaultFormatBundle'),
15+
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16+
]
17+
test_pipeline = [
18+
dict(type='LoadImageFromFile'),
19+
dict(
20+
type='MultiScaleFlipAug',
21+
img_scale=(2049, 1025),
22+
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23+
flip=False,
24+
transforms=[
25+
dict(type='Resize', keep_ratio=True),
26+
dict(type='RandomFlip'),
27+
dict(type='Normalize', **img_norm_cfg),
28+
dict(type='ImageToTensor', keys=['img']),
29+
dict(type='Collect', keys=['img']),
30+
])
31+
]
32+
data = dict(
33+
train=dict(pipeline=train_pipeline),
34+
val=dict(pipeline=test_pipeline),
35+
test=dict(pipeline=test_pipeline))

configs/setr/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,11 @@ This head has two version head.
4545
| SETR-PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_pup_512x512_160k_b16_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) |
4646
| SETR-MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_mla_512x512_160k_b8_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) |
4747
| SETR-MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.54 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_mla_512x512_160k_b16_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) |
48+
49+
### Cityscapes
50+
51+
| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
52+
| ------ | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
53+
| SETR-Naive | ViT-L | 768x768 | 8 | 80000 | 24.06 | 0.39 | 78.10 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json) |
54+
| SETR-PUP | ViT-L | 768x768 | 8 | 80000 | 27.96 | 0.37 | 79.21 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json) |
55+
| SETR-MLA | ViT-L | 768x768 | 8 | 80000 | 24.10 | 0.41 | 77.00 | 79.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth) &#124; [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json) |

configs/setr/setr.yml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ Collections:
33
Metadata:
44
Training Data:
55
- ADE20K
6+
- Cityscapes
67
Paper:
78
URL: https://arxiv.org/abs/2012.15840
89
Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective
@@ -95,3 +96,69 @@ Models:
9596
mIoU(ms+flip): 49.37
9697
Config: configs/setr/setr_mla_512x512_160k_b16_ade20k.py
9798
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth
99+
- Name: setr_vit-large_naive_8x1_768x768_80k_cityscapes
100+
In Collection: setr
101+
Metadata:
102+
backbone: ViT-L
103+
crop size: (768,768)
104+
lr schd: 80000
105+
inference time (ms/im):
106+
- value: 2564.1
107+
hardware: V100
108+
backend: PyTorch
109+
batch size: 1
110+
mode: FP32
111+
resolution: (768,768)
112+
Training Memory (GB): 24.06
113+
Results:
114+
- Task: Semantic Segmentation
115+
Dataset: Cityscapes
116+
Metrics:
117+
mIoU: 78.1
118+
mIoU(ms+flip): 80.22
119+
Config: configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py
120+
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth
121+
- Name: setr_vit-large_pup_8x1_768x768_80k_cityscapes
122+
In Collection: setr
123+
Metadata:
124+
backbone: ViT-L
125+
crop size: (768,768)
126+
lr schd: 80000
127+
inference time (ms/im):
128+
- value: 2702.7
129+
hardware: V100
130+
backend: PyTorch
131+
batch size: 1
132+
mode: FP32
133+
resolution: (768,768)
134+
Training Memory (GB): 27.96
135+
Results:
136+
- Task: Semantic Segmentation
137+
Dataset: Cityscapes
138+
Metrics:
139+
mIoU: 79.21
140+
mIoU(ms+flip): 81.02
141+
Config: configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py
142+
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth
143+
- Name: setr_vit-large_mla_8x1_768x768_80k_cityscapes
144+
In Collection: setr
145+
Metadata:
146+
backbone: ViT-L
147+
crop size: (768,768)
148+
lr schd: 80000
149+
inference time (ms/im):
150+
- value: 2439.02
151+
hardware: V100
152+
backend: PyTorch
153+
batch size: 1
154+
mode: FP32
155+
resolution: (768,768)
156+
Training Memory (GB): 24.1
157+
Results:
158+
- Task: Semantic Segmentation
159+
Dataset: Cityscapes
160+
Metrics:
161+
mIoU: 77.0
162+
mIoU(ms+flip): 79.59
163+
Config: configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py
164+
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth

configs/setr/setr_mla_512x512_160k_b8_ade20k.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44
]
55
norm_cfg = dict(type='SyncBN', requires_grad=True)
66
model = dict(
7-
pretrained='pretrain/vit_large_patch16_384.pth',
8-
backbone=dict(img_size=(512, 512), drop_rate=0.),
7+
pretrained=None,
8+
backbone=dict(
9+
img_size=(512, 512),
10+
drop_rate=0.,
11+
init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')),
912
decode_head=dict(num_classes=150),
1013
auxiliary_head=[
1114
dict(

configs/setr/setr_naive_512x512_160k_b16_ade20k.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44
]
55
norm_cfg = dict(type='SyncBN', requires_grad=True)
66
model = dict(
7-
pretrained='pretrain/vit_large_patch16_384.pth',
8-
backbone=dict(img_size=(512, 512), drop_rate=0.),
7+
pretrained=None,
8+
backbone=dict(
9+
img_size=(512, 512),
10+
drop_rate=0.,
11+
init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')),
912
decode_head=dict(num_classes=150),
1013
auxiliary_head=[
1114
dict(

configs/setr/setr_pup_512x512_160k_b16_ade20k.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44
]
55
norm_cfg = dict(type='SyncBN', requires_grad=True)
66
model = dict(
7-
pretrained='pretrain/vit_large_patch16_384.pth',
8-
backbone=dict(img_size=(512, 512), drop_rate=0.),
7+
pretrained=None,
8+
backbone=dict(
9+
img_size=(512, 512),
10+
drop_rate=0.,
11+
init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')),
912
decode_head=dict(num_classes=150),
1013
auxiliary_head=[
1114
dict(
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
_base_ = [
2+
'../_base_/models/setr_mla.py', '../_base_/datasets/cityscapes_768x768.py',
3+
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4+
]
5+
model = dict(
6+
pretrained=None,
7+
backbone=dict(
8+
drop_rate=0,
9+
init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')),
10+
test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512)))
11+
12+
optimizer = dict(
13+
lr=0.002,
14+
weight_decay=0.0,
15+
paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)}))
16+
data = dict(samples_per_gpu=1)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
_base_ = [
2+
'../_base_/models/setr_naive.py',
3+
'../_base_/datasets/cityscapes_768x768.py', '../_base_/default_runtime.py',
4+
'../_base_/schedules/schedule_80k.py'
5+
]
6+
model = dict(
7+
pretrained=None,
8+
backbone=dict(
9+
drop_rate=0.,
10+
init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')),
11+
test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512)))
12+
13+
optimizer = dict(
14+
weight_decay=0.0,
15+
paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)}))
16+
17+
data = dict(samples_per_gpu=1)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
_base_ = [
2+
'../_base_/models/setr_pup.py', '../_base_/datasets/cityscapes_768x768.py',
3+
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4+
]
5+
6+
norm_cfg = dict(type='SyncBN', requires_grad=True)
7+
crop_size = (768, 768)
8+
model = dict(
9+
pretrained=None,
10+
backbone=dict(
11+
drop_rate=0.,
12+
init_cfg=dict(type='Pretrained', checkpoint='mmcls://vit_large_p16')),
13+
auxiliary_head=[
14+
dict(
15+
type='SETRUPHead',
16+
in_channels=1024,
17+
channels=256,
18+
in_index=0,
19+
num_classes=19,
20+
dropout_ratio=0,
21+
norm_cfg=norm_cfg,
22+
num_convs=2,
23+
up_scale=4,
24+
kernel_size=3,
25+
align_corners=False,
26+
loss_decode=dict(
27+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
28+
dict(
29+
type='SETRUPHead',
30+
in_channels=1024,
31+
channels=256,
32+
in_index=1,
33+
num_classes=19,
34+
dropout_ratio=0,
35+
norm_cfg=norm_cfg,
36+
num_convs=2,
37+
up_scale=4,
38+
kernel_size=3,
39+
align_corners=False,
40+
loss_decode=dict(
41+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42+
dict(
43+
type='SETRUPHead',
44+
in_channels=1024,
45+
channels=256,
46+
in_index=2,
47+
num_classes=19,
48+
dropout_ratio=0,
49+
norm_cfg=norm_cfg,
50+
num_convs=2,
51+
up_scale=4,
52+
kernel_size=3,
53+
align_corners=False,
54+
loss_decode=dict(
55+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4))
56+
],
57+
test_cfg=dict(mode='slide', crop_size=crop_size, stride=(512, 512)))
58+
59+
optimizer = dict(
60+
weight_decay=0.0,
61+
paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)}))
62+
63+
data = dict(samples_per_gpu=1)

0 commit comments

Comments
 (0)