speedinghzl
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 58 additions & 0 deletions b/‎README.md‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎dataset/__init__.py‎ b/‎dataset/__init__.py‎
diff --git a/‎dataset/__pycache__/__init__.cpython-37.pyc‎
161 Bytes b/‎dataset/__pycache__/__init__.cpython-37.pyc‎
161 Bytes
diff --git a/‎dataset/__pycache__/datasets.cpython-37.pyc‎
10.1 KB b/‎dataset/__pycache__/datasets.cpython-37.pyc‎
10.1 KB
diff --git a/‎dataset/datasets.py‎
Lines changed: 300 additions & 0 deletions b/‎dataset/datasets.py‎
Lines changed: 300 additions & 0 deletions
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Zilong Huang
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,58 @@
+# Pytorch-segmentation-toolbox [DOC](https://weiyc.github.io/assets/pdf/toolbox.pdf)
+Pytorch code for semantic segmentation. This is a minimal code to run PSPnet and Deeplabv3 on Cityscape dataset.
+Shortly afterwards, the code will be reviewed and reorganized for convenience.
+
+### Highlights of Our Implementations
+- Synchronous BN
+- Fewness of Training Time
+- Better Reproduced Performance
+
+### Requirements && Install
+Python 3.7
+
+4 x 12g GPUs (e.g. TITAN XP)
+
+```bash
+# Install **Pytorch-1.1**
+$ conda install pytorch torchvision cudatoolkit=9.0 -c pytorch
+
+# Install **Apex**
+$ git clone https://github.com/NVIDIA/apex
+$ cd apex
+$ pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
+
+# Install **Inplace-ABN**
+$ git clone https://github.com/mapillary/inplace_abn.git
+$ cd inplace_abn
+$ python setup.py install
+```
+
+### Dataset and pretrained model
+
+Plesae download cityscapes dataset and unzip the dataset into `YOUR_CS_PATH`.
+
+Please download MIT imagenet pretrained [resnet101-imagenet.pth](http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth), and put it into `dataset` folder.
+
+### Training and Evaluation
+```bash
+./run_local.sh YOUR_CS_PATH [pspnet|deeplabv3] 40000 769,769 0
+``` 
+
+### Benefits
+Some recent projects have already benefited from our implementations. For example, [CCNet: Criss-Cross Attention for semantic segmentation](https://github.com/speedinghzl/CCNet) and [Object  Context  Network(OCNet)](https://github.com/PkuRainBow/OCNet) currently  achieve  the  state-of-the-art  resultson  Cityscapes  and  ADE20K. In  addition, Our code also make great contributions to [Context Embedding with EdgePerceiving (CE2P)](https://github.com/liutinglt/CE2P), which won the 1st places in all human parsing tracks in the 2nd LIP Challange. 
+
+### Citing
+
+If you find this code useful in your research, please consider citing:
+
+    @misc{huang2018torchseg,
+      author = {Huang, Zilong and Wei, Yunchao and Wang, Xinggang, and Liu, Wenyu},
+      title = {A PyTorch Semantic Segmentation Toolbox},
+      howpublished = {\url{https://github.com/speedinghzl/pytorch-segmentation-toolbox}},
+      year = {2018}
+    }
+
+### Thanks to the Third Party Libs
+[inplace_abn](https://github.com/mapillary/inplace_abn) - 
+[Pytorch-Deeplab](https://github.com/speedinghzl/Pytorch-Deeplab) - 
+[PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding)
@@ -0,0 +1,300 @@
+import os
+import os.path as osp
+import numpy as np
+import random
+import collections
+import torch
+import torchvision
+import cv2
+from torch.utils import data
+
+
+class VOCDataSet(data.Dataset):
+    def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
+        self.root = root
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size
+        self.scale = scale
+        self.ignore_label = ignore_label
+        self.mean = mean
+        self.is_mirror = mirror
+        # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+        self.img_ids = [i_id.strip() for i_id in open(list_path)]
+        if not max_iters==None:
+            self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
+        self.files = []
+        # for split in ["train", "trainval", "val"]:
+        for name in self.img_ids:
+            img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name)
+            label_file = osp.join(self.root, "SegmentationClassAug/%s.png" % name)
+            self.files.append({
+                "img": img_file,
+                "label": label_file,
+                "name": name
+            })
+
+    def __len__(self):
+        return len(self.files)
+
+    def generate_scale_label(self, image, label):
+        f_scale = 0.5 + random.randint(0, 11) / 10.0
+        image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
+        label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
+        return image, label
+
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
+        label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
+        size = image.shape
+        name = datafiles["name"]
+        if self.scale:
+            image, label = self.generate_scale_label(image, label)
+        image = np.asarray(image, np.float32)
+        image -= self.mean
+        img_h, img_w = label.shape
+        pad_h = max(self.crop_h - img_h, 0)
+        pad_w = max(self.crop_w - img_w, 0)
+        if pad_h > 0 or pad_w > 0:
+            img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT, 
+                value=(0.0, 0.0, 0.0))
+            label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT,
+                value=(self.ignore_label,))
+        else:
+            img_pad, label_pad = image, label
+
+        img_h, img_w = label_pad.shape
+        h_off = random.randint(0, img_h - self.crop_h)
+        w_off = random.randint(0, img_w - self.crop_w)
+        # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h);
+        image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+        label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+        #image = image[:, :, ::-1]  # change to BGR
+        image = image.transpose((2, 0, 1))
+        if self.is_mirror:
+            flip = np.random.choice(2) * 2 - 1
+            image = image[:, :, ::flip]
+            label = label[:, ::flip]
+
+        return image.copy(), label.copy(), np.array(size), name
+
+
+class VOCDataTestSet(data.Dataset):
+    def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)):
+        self.root = root
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size
+        self.mean = mean
+        # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+        self.img_ids = [i_id.strip() for i_id in open(list_path)]
+        self.files = [] 
+        # for split in ["train", "trainval", "val"]:
+        for name in self.img_ids:
+            img_file = osp.join(self.root, "JPEGImages/%s.jpg" % name)
+            self.files.append({
+                "img": img_file
+            })
+
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
+        size = image.shape
+        name = osp.splitext(osp.basename(datafiles["img"]))[0]
+        image = np.asarray(image, np.float32)
+        image -= self.mean
+        
+        img_h, img_w, _ = image.shape
+        pad_h = max(self.crop_h - img_h, 0)
+        pad_w = max(self.crop_w - img_w, 0)
+        if pad_h > 0 or pad_w > 0:
+            image = cv2.copyMakeBorder(image, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT, 
+                value=(0.0, 0.0, 0.0))
+        image = image.transpose((2, 0, 1))
+        return image, name, size
+
+class CSDataSet(data.Dataset):
+    def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
+        self.root = root
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size
+        self.scale = scale
+        self.ignore_label = ignore_label
+        self.mean = mean
+        self.is_mirror = mirror
+        # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+        self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
+        if not max_iters==None:
+                self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
+        self.files = []
+        # for split in ["train", "trainval", "val"]:
+        for item in self.img_ids:
+            image_path, label_path = item
+            name = osp.splitext(osp.basename(label_path))[0]
+            img_file = osp.join(self.root, image_path)
+            label_file = osp.join(self.root, label_path)
+            self.files.append({
+                "img": img_file,
+                "label": label_file,
+                "name": name
+            })
+        self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label,
+                              3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label,
+                              7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4,
+                              14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5,
+                              18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14,
+                              28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18}
+        print('{} images are loaded!'.format(len(self.img_ids)))
+
+    def __len__(self):
+        return len(self.files)
+
+    def generate_scale_label(self, image, label):
+        f_scale = 0.7 + random.randint(0, 14) / 10.0
+        image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR)
+        label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST)
+        return image, label
+
+    def id2trainId(self, label, reverse=False):
+        label_copy = label.copy()
+        if reverse:
+            for v, k in self.id_to_trainid.items():
+                label_copy[label == k] = v
+        else:
+            for k, v in self.id_to_trainid.items():
+                label_copy[label == k] = v
+        return label_copy
+
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
+        label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE)
+        label = self.id2trainId(label)
+        size = image.shape
+        name = datafiles["name"]
+        if self.scale:
+            image, label = self.generate_scale_label(image, label)
+        image = np.asarray(image, np.float32)
+        image -= self.mean
+        img_h, img_w = label.shape
+        pad_h = max(self.crop_h - img_h, 0)
+        pad_w = max(self.crop_w - img_w, 0)
+        if pad_h > 0 or pad_w > 0:
+            img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT, 
+                value=(0.0, 0.0, 0.0))
+            label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT,
+                value=(self.ignore_label,))
+        else:
+            img_pad, label_pad = image, label
+
+        img_h, img_w = label_pad.shape
+        h_off = random.randint(0, img_h - self.crop_h)
+        w_off = random.randint(0, img_w - self.crop_w)
+        # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h);
+        image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+        label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32)
+        #image = image[:, :, ::-1]  # change to BGR
+        image = image.transpose((2, 0, 1))
+        if self.is_mirror:
+            flip = np.random.choice(2) * 2 - 1
+            image = image[:, :, ::flip]
+            label = label[:, ::flip]
+
+        return image.copy(), label.copy(), np.array(size), name
+
+
+class CSDataTestSet(data.Dataset):
+    def __init__(self, root, list_path, crop_size=(505, 505), mean=(128, 128, 128)):
+        self.root = root
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size
+        self.mean = mean
+        # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+        self.img_ids = [i_id.strip().split() for i_id in open(list_path)]
+        self.files = [] 
+        # for split in ["train", "trainval", "val"]:
+        for item in self.img_ids:
+            image_path, label_path = item
+            name = osp.splitext(osp.basename(label_path))[0]
+            img_file = osp.join(self.root, image_path)
+            self.files.append({
+                "img": img_file
+            })
+
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
+        size = image.shape
+        name = osp.splitext(osp.basename(datafiles["img"]))[0]
+        image = np.asarray(image, np.float32)
+        image -= self.mean
+        
+        img_h, img_w, _ = image.shape
+        pad_h = max(self.crop_h - img_h, 0)
+        pad_w = max(self.crop_w - img_w, 0)
+        if pad_h > 0 or pad_w > 0:
+            image = cv2.copyMakeBorder(image, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT, 
+                value=(0.0, 0.0, 0.0))
+        image = image.transpose((2, 0, 1))
+        return image, name, size
+
+class CSDataTestSet(data.Dataset):
+    def __init__(self, root, list_path, crop_size=(505, 505)):
+        self.root = root
+        self.list_path = list_path
+        self.crop_h, self.crop_w = crop_size
+        # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+        self.img_ids = [i_id.strip().split()[0] for i_id in open(list_path)]
+        self.files = [] 
+        # for split in ["train", "trainval", "val"]:
+        for image_path in self.img_ids:
+            name = osp.splitext(osp.basename(image_path))[0]
+            img_file = osp.join(self.root, image_path)
+            self.files.append({
+                "img": img_file
+            })
+
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, index):
+        datafiles = self.files[index]
+        image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR)
+        image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR)
+        size = image.shape
+        name = osp.splitext(osp.basename(datafiles["img"]))[0]
+        image = np.asarray(image, np.float32)
+        image = (image - image.min()) / (image.max() - image.min())
+        
+        img_h, img_w, _ = image.shape
+        pad_h = max(self.crop_h - img_h, 0)
+        pad_w = max(self.crop_w - img_w, 0)
+        if pad_h > 0 or pad_w > 0:
+            image = cv2.copyMakeBorder(image, 0, pad_h, 0, 
+                pad_w, cv2.BORDER_CONSTANT, 
+                value=(0.0, 0.0, 0.0))
+        image = image.transpose((2, 0, 1))
+        return image, np.array(size), name
+
+if __name__ == '__main__':
+    dst = VOCDataSet("./data", is_transform=True)
+    trainloader = data.DataLoader(dst, batch_size=4)
+    for i, data in enumerate(trainloader):
+        imgs, labels = data
+        if i == 0:
+            img = torchvision.utils.make_grid(imgs).numpy()
+            img = np.transpose(img, (1, 2, 0))
+            img = img[:, :, ::-1]
+            plt.imshow(img)
+            plt.show()