Skip to content

Commit 332302f

Browse files
committed
init
1 parent cba35f1 commit 332302f

29 files changed

+69363
-0
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2018 Zilong Huang
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Pytorch-segmentation-toolbox [DOC](https://weiyc.github.io/assets/pdf/toolbox.pdf)
2+
Pytorch code for semantic segmentation. This is a minimal code to run PSPnet and Deeplabv3 on Cityscape dataset.
3+
Shortly afterwards, the code will be reviewed and reorganized for convenience.
4+
5+
### Highlights of Our Implementations
6+
- Synchronous BN
7+
- Fewness of Training Time
8+
- Better Reproduced Performance
9+
10+
### Requirements && Install
11+
Python 3.7
12+
13+
4 x 12g GPUs (e.g. TITAN XP)
14+
15+
```bash
16+
# Install **Pytorch-1.1**
17+
$ conda install pytorch torchvision cudatoolkit=9.0 -c pytorch
18+
19+
# Install **Apex**
20+
$ git clone https://github.com/NVIDIA/apex
21+
$ cd apex
22+
$ pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
23+
24+
# Install **Inplace-ABN**
25+
$ git clone https://github.com/mapillary/inplace_abn.git
26+
$ cd inplace_abn
27+
$ python setup.py install
28+
```
29+
30+
### Dataset and pretrained model
31+
32+
Plesae download cityscapes dataset and unzip the dataset into `YOUR_CS_PATH`.
33+
34+
Please download MIT imagenet pretrained [resnet101-imagenet.pth](http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth), and put it into `dataset` folder.
35+
36+
### Training and Evaluation
37+
```bash
38+
./run_local.sh YOUR_CS_PATH [pspnet|deeplabv3] 40000 769,769 0
39+
```
40+
41+
### Benefits
42+
Some recent projects have already benefited from our implementations. For example, [CCNet: Criss-Cross Attention for semantic segmentation](https://github.com/speedinghzl/CCNet) and [Object Context Network(OCNet)](https://github.com/PkuRainBow/OCNet) currently achieve the state-of-the-art resultson Cityscapes and ADE20K. In addition, Our code also make great contributions to [Context Embedding with EdgePerceiving (CE2P)](https://github.com/liutinglt/CE2P), which won the 1st places in all human parsing tracks in the 2nd LIP Challange.
43+
44+
### Citing
45+
46+
If you find this code useful in your research, please consider citing:
47+
48+
@misc{huang2018torchseg,
49+
author = {Huang, Zilong and Wei, Yunchao and Wang, Xinggang, and Liu, Wenyu},
50+
title = {A PyTorch Semantic Segmentation Toolbox},
51+
howpublished = {\url{https://github.com/speedinghzl/pytorch-segmentation-toolbox}},
52+
year = {2018}
53+
}
54+
55+
### Thanks to the Third Party Libs
56+
[inplace_abn](https://github.com/mapillary/inplace_abn) -
57+
[Pytorch-Deeplab](https://github.com/speedinghzl/Pytorch-Deeplab) -
58+
[PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding)

dataset/__init__.py

Whitespace-only changes.
161 Bytes
Binary file not shown.
10.1 KB
Binary file not shown.

dataset/datasets.py

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
import os
2+
import os.path as osp
3+
import numpy as np
4+
import random
5+
import collections
6+
import torch
7+
import torchvision
8+
import cv2
9+
from torch.utils import data
10+
11+
12+
class VOCDataSet(data.Dataset):
13+
def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
14+
self.root = root
15+
self.list_path = list_path
16+
self.crop_h, self.crop_w = crop_size
17+
self.scale = scale
18+
self.ignore_label = ignore_label
19+
self.mean = mean
20+
self.is_mirror = mirror
21+
# self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
22+
self.img_ids = [i_id.strip() for i_id in open(list_path)]
23+
if not max_iters==None:
24+
self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
25+
self.files = []
26+
# for split in ["train", "trainval", "val"]:
27+
for name in self.img_ids:
28+
img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name)
29+
label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name)
30+
self.files.append({
31+
"img": img_file,
32+
"label": label_file,
33+
"name": name
34+
})
35+
36+
def __len__(self):
37+
return len(self.files)
38+
39+
def generate_scale_label(self, image, label):
40+
f_scale = 0.5 + random.randint(0, 11) / 10.0
41+
image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
42+
label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
43+
return image, label
44+
45+
def __getitem__(self, index):
46+
datafiles = self.files[index]
47+
image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
48+
label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
49+
size = image.shape
50+
name = datafiles["name"]
51+
if self.scale:
52+
image, label = self.generate_scale_label(image, label)
53+
image = np.asarray(image, np.float32)
54+
image -= self.mean
55+
img_h, img_w = label.shape
56+
pad_h = max(self.crop_h - img_h, 0)
57+
pad_w = max(self.crop_w - img_w, 0)
58+
if pad_h > 0 or pad_w > 0:
59+
img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0,
60+
pad_w, cv2.BORDER_CONSTANT,
61+
value=(0.0, 0.0, 0.0))
62+
label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0,
63+
pad_w, cv2.BORDER_CONSTANT,
64+
value=(self.ignore_label,))
65+
else:
66+
img_pad, label_pad = image, label
67+
68+
img_h, img_w = label_pad.shape
69+
h_off = random.randint(0, img_h - self.crop_h)
70+
w_off = random.randint(0, img_w - self.crop_w)
71+
# roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h);
72+
image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
73+
label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
74+
#image = image[:, :, ::-1] # change to BGR
75+
image = image.transpose((2, 0, 1))
76+
if self.is_mirror:
77+
flip = np.random.choice(2) * 2 - 1
78+
image = image[:, :, ::flip]
79+
label = label[:, ::flip]
80+
81+
return image.copy(), label.copy(), np.array(size), name
82+
83+
84+
class VOCDataTestSet(data.Dataset):
85+
def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)):
86+
self.root = root
87+
self.list_path = list_path
88+
self.crop_h, self.crop_w = crop_size
89+
self.mean = mean
90+
# self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
91+
self.img_ids = [i_id.strip() for i_id in open(list_path)]
92+
self.files = []
93+
# for split in ["train", "trainval", "val"]:
94+
for name in self.img_ids:
95+
img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name)
96+
self.files.append({
97+
"img": img_file
98+
})
99+
100+
def __len__(self):
101+
return len(self.files)
102+
103+
def __getitem__(self, index):
104+
datafiles = self.files[index]
105+
image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
106+
size = image.shape
107+
name = osp.splitext(osp.basename(datafiles["img"]))[0]
108+
image = np.asarray(image, np.float32)
109+
image -= self.mean
110+
111+
img_h, img_w, _ = image.shape
112+
pad_h = max(self.crop_h - img_h, 0)
113+
pad_w = max(self.crop_w - img_w, 0)
114+
if pad_h > 0 or pad_w > 0:
115+
image = cv2.copyMakeBorder(image, 0, pad_h, 0,
116+
pad_w, cv2.BORDER_CONSTANT,
117+
value=(0.0, 0.0, 0.0))
118+
image = image.transpose((2, 0, 1))
119+
return image, name, size
120+
121+
class CSDataSet(data.Dataset):
122+
def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
123+
self.root = root
124+
self.list_path = list_path
125+
self.crop_h, self.crop_w = crop_size
126+
self.scale = scale
127+
self.ignore_label = ignore_label
128+
self.mean = mean
129+
self.is_mirror = mirror
130+
# self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
131+
self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
132+
if not max_iters==None:
133+
self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
134+
self.files = []
135+
# for split in ["train", "trainval", "val"]:
136+
for item in self.img_ids:
137+
image_path, label_path = item
138+
name = osp.splitext(osp.basename(label_path))[0]
139+
img_file = osp.join(self.root, image_path)
140+
label_file = osp.join(self.root, label_path)
141+
self.files.append({
142+
"img": img_file,
143+
"label": label_file,
144+
"name": name
145+
})
146+
self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label,
147+
3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label,
148+
7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4,
149+
14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5,
150+
18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14,
151+
28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18}
152+
print('{} images are loaded!'.format(len(self.img_ids)))
153+
154+
def __len__(self):
155+
return len(self.files)
156+
157+
def generate_scale_label(self, image, label):
158+
f_scale = 0.7 + random.randint(0, 14) / 10.0
159+
image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
160+
label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
161+
return image, label
162+
163+
def id2trainId(self, label, reverse=False):
164+
label_copy = label.copy()
165+
if reverse:
166+
for v, k in self.id_to_trainid.items():
167+
label_copy[label == k] = v
168+
else:
169+
for k, v in self.id_to_trainid.items():
170+
label_copy[label == k] = v
171+
return label_copy
172+
173+
def __getitem__(self, index):
174+
datafiles = self.files[index]
175+
image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
176+
label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
177+
label = self.id2trainId(label)
178+
size = image.shape
179+
name = datafiles["name"]
180+
if self.scale:
181+
image, label = self.generate_scale_label(image, label)
182+
image = np.asarray(image, np.float32)
183+
image -= self.mean
184+
img_h, img_w = label.shape
185+
pad_h = max(self.crop_h - img_h, 0)
186+
pad_w = max(self.crop_w - img_w, 0)
187+
if pad_h > 0 or pad_w > 0:
188+
img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0,
189+
pad_w, cv2.BORDER_CONSTANT,
190+
value=(0.0, 0.0, 0.0))
191+
label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0,
192+
pad_w, cv2.BORDER_CONSTANT,
193+
value=(self.ignore_label,))
194+
else:
195+
img_pad, label_pad = image, label
196+
197+
img_h, img_w = label_pad.shape
198+
h_off = random.randint(0, img_h - self.crop_h)
199+
w_off = random.randint(0, img_w - self.crop_w)
200+
# roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h);
201+
image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
202+
label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
203+
#image = image[:, :, ::-1] # change to BGR
204+
image = image.transpose((2, 0, 1))
205+
if self.is_mirror:
206+
flip = np.random.choice(2) * 2 - 1
207+
image = image[:, :, ::flip]
208+
label = label[:, ::flip]
209+
210+
return image.copy(), label.copy(), np.array(size), name
211+
212+
213+
class CSDataTestSet(data.Dataset):
214+
def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)):
215+
self.root = root
216+
self.list_path = list_path
217+
self.crop_h, self.crop_w = crop_size
218+
self.mean = mean
219+
# self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
220+
self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
221+
self.files = []
222+
# for split in ["train", "trainval", "val"]:
223+
for item in self.img_ids:
224+
image_path, label_path = item
225+
name = osp.splitext(osp.basename(label_path))[0]
226+
img_file = osp.join(self.root, image_path)
227+
self.files.append({
228+
"img": img_file
229+
})
230+
231+
def __len__(self):
232+
return len(self.files)
233+
234+
def __getitem__(self, index):
235+
datafiles = self.files[index]
236+
image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
237+
size = image.shape
238+
name = osp.splitext(osp.basename(datafiles["img"]))[0]
239+
image = np.asarray(image, np.float32)
240+
image -= self.mean
241+
242+
img_h, img_w, _ = image.shape
243+
pad_h = max(self.crop_h - img_h, 0)
244+
pad_w = max(self.crop_w - img_w, 0)
245+
if pad_h > 0 or pad_w > 0:
246+
image = cv2.copyMakeBorder(image, 0, pad_h, 0,
247+
pad_w, cv2.BORDER_CONSTANT,
248+
value=(0.0, 0.0, 0.0))
249+
image = image.transpose((2, 0, 1))
250+
return image, name, size
251+
252+
class CSDataTestSet(data.Dataset):
253+
def __init__(self, root, list_path, crop_size=(505, 505)):
254+
self.root = root
255+
self.list_path = list_path
256+
self.crop_h, self.crop_w = crop_size
257+
# self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
258+
self.img_ids = [i_id.strip().split()[0] for i_id in open(list_path)]
259+
self.files = []
260+
# for split in ["train", "trainval", "val"]:
261+
for image_path in self.img_ids:
262+
name = osp.splitext(osp.basename(image_path))[0]
263+
img_file = osp.join(self.root, image_path)
264+
self.files.append({
265+
"img": img_file
266+
})
267+
268+
def __len__(self):
269+
return len(self.files)
270+
271+
def __getitem__(self, index):
272+
datafiles = self.files[index]
273+
image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
274+
image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR)
275+
size = image.shape
276+
name = osp.splitext(osp.basename(datafiles["img"]))[0]
277+
image = np.asarray(image, np.float32)
278+
image = (image - image.min()) / (image.max() - image.min())
279+
280+
img_h, img_w, _ = image.shape
281+
pad_h = max(self.crop_h - img_h, 0)
282+
pad_w = max(self.crop_w - img_w, 0)
283+
if pad_h > 0 or pad_w > 0:
284+
image = cv2.copyMakeBorder(image, 0, pad_h, 0,
285+
pad_w, cv2.BORDER_CONSTANT,
286+
value=(0.0, 0.0, 0.0))
287+
image = image.transpose((2, 0, 1))
288+
return image, np.array(size), name
289+
290+
if __name__ == '__main__':
291+
dst = VOCDataSet("./data", is_transform=True)
292+
trainloader = data.DataLoader(dst, batch_size=4)
293+
for i, data in enumerate(trainloader):
294+
imgs, labels = data
295+
if i == 0:
296+
img = torchvision.utils.make_grid(imgs).numpy()
297+
img = np.transpose(img, (1, 2, 0))
298+
img = img[:, :, ::-1]
299+
plt.imshow(img)
300+
plt.show()

0 commit comments

Comments
 (0)