Capri2014
diff --git a/‎data/train.odgt‎
Lines changed: 20210 additions & 0 deletions b/‎data/train.odgt‎
Lines changed: 20210 additions & 0 deletions
diff --git a/‎data/validation.odgt‎
Lines changed: 2000 additions & 0 deletions b/‎data/validation.odgt‎
Lines changed: 2000 additions & 0 deletions
diff --git a/‎dataset.py‎
Lines changed: 198 additions & 91 deletions b/‎dataset.py‎
Lines changed: 198 additions & 91 deletions
diff --git a/‎demo_test.sh‎
Lines changed: 0 additions & 15 deletions b/‎demo_test.sh‎
Lines changed: 0 additions & 15 deletions
@@ -1,119 +1,226 @@
 import os
-import random
-import numpy as np
+import json
 import torch
-import torch.utils.data as torchdata
+import lib.utils.data as torchdata
+import cv2
 from torchvision import transforms
 from scipy.misc import imread, imresize
+import numpy as np
 
+# Round x to the nearest multiple of p and x' >= x
+def round2nearest_multiple(x, p):
+    return ((x - 1) // p + 1) * p
 
-class Dataset(torchdata.Dataset):
-    def __init__(self, txt, opt, max_sample=-1, is_train=1):
-        self.root_img = opt.root_img
-        self.root_seg = opt.root_seg
+class TrainDataset(torchdata.Dataset):
+    def __init__(self, odgt, opt, max_sample=-1, batch_per_gpu=1):
+        self.root_dataset = opt.root_dataset
         self.imgSize = opt.imgSize
-        self.segSize = opt.segSize
-        self.is_train = is_train
+        self.imgMaxSize = opt.imgMaxSize
+        self.random_flip = opt.random_flip
+        # max down sampling rate of network to avoid rounding during conv or pooling
+        self.padding_constant = opt.padding_constant
+        # down sampling rate of segm labe
+        self.segm_downsampling_rate = opt.segm_downsampling_rate 
+        self.batch_per_gpu = batch_per_gpu
+        
+        # classify images into two classes: 1. h > w and 2. h <= w
+        self.batch_record_list = [[], []] 
+        
+        # override dataset length when trainig with batch_per_gpu > 1
+        self.cur_idx = 0
 
         # mean and std
         self.img_transform = transforms.Compose([
-            transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                 std=[0.229, 0.224, 0.225])])
+            transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.])
+            ])
 
-        self.list_sample = [x.rstrip() for x in open(txt, 'r')]
+        self.list_sample = [json.loads(x.rstrip()) for x in open(odgt, 'r')]
 
-        if self.is_train:
-            random.shuffle(self.list_sample)
+        np.random.shuffle(self.list_sample)
         if max_sample > 0:
             self.list_sample = self.list_sample[0:max_sample]
-        num_sample = len(self.list_sample)
-        assert num_sample > 0
-        print('# samples: {}'.format(num_sample))
+        self.num_sample = len(self.list_sample)
+        assert self.num_sample > 0
+        print('# samples: {}'.format(self.num_sample))
+
+    def _get_sub_batch(self):
+        while True:
+            # get a sample record
+            this_sample = self.list_sample[self.cur_idx]
+            if this_sample['height'] > this_sample['width']:
+                self.batch_record_list[0].append(this_sample) # h > w, go to 1st class
+            else:
+                self.batch_record_list[1].append(this_sample) # h <= w, go to 2nd class
+            
+            # update current sample pointer
+            self.cur_idx += 1
+            if self.cur_idx >= self.num_sample:
+                self.cur_idx = 0
+                np.random.shuffle(self.list_sample)
+           
+            if len(self.batch_record_list[0]) == self.batch_per_gpu:
+                batch_records = self.batch_record_list[0]
+                self.batch_record_list[0] = []
+                break
+            elif len(self.batch_record_list[1]) == self.batch_per_gpu:
+                batch_records = self.batch_record_list[1]
+                self.batch_record_list[1] = []
+                break
+        return batch_records
 
-    def _scale_and_crop(self, img, seg, cropSize, is_train):
-        h, w = img.shape[0], img.shape[1]
-
-        if is_train:
-            # random scale
-            scale = random.random() + 0.5     # 0.5-1.5
-            scale = max(scale, 1. * cropSize / (min(h, w) - 1))
+    def __getitem__(self, index):
+        # get sub-batch candidates
+        batch_records = self._get_sub_batch()
+        
+        # resize all images' short edges to the chosen size 
+        if isinstance(self.imgSize, list):
+            this_short_size = np.random.choice(self.imgSize)
         else:
-            # scale to crop size
-            scale = 1. * cropSize / (min(h, w) - 1)
+            this_short_size = self.imgSize
+
+        # calculate the BATCH's height and width
+        # since we concat more than one samples, the batch's h and w shall be larger than EACH sample
+        batch_resized_size = np.zeros((self.batch_per_gpu, 2), np.int32)
+        for i in range(self.batch_per_gpu):
+            img_height, img_width = batch_records[i]['height'], batch_records[i]['width']
+            this_scale = min(this_short_size / min(img_height, img_width), \
+                    self.imgMaxSize / max(img_height, img_width))
+            img_resized_height, img_resized_width = img_height * this_scale, img_width * this_scale
+            batch_resized_size[i, :] = img_resized_height, img_resized_width
+        batch_resized_height = np.max(batch_resized_size[:, 0])
+        batch_resized_width = np.max(batch_resized_size[:, 1])
+
+        # Here we must pad both input image and segmentation map to size h' and w' so that p | h' and p | w'
+        batch_resized_height = int(round2nearest_multiple(batch_resized_height, self.padding_constant))
+        batch_resized_width = int(round2nearest_multiple(batch_resized_width, self.padding_constant))
+        
+        assert self.padding_constant >= self.segm_downsampling_rate,\
+                'padding constant must be equal or large than segm downsamping rate'
+        batch_images = torch.zeros(self.batch_per_gpu, 3, batch_resized_height, batch_resized_width)
+        batch_segms = torch.zeros(self.batch_per_gpu, batch_resized_height // self.segm_downsampling_rate, \
+                                batch_resized_width // self.segm_downsampling_rate).long()
+        
+        for i in range(self.batch_per_gpu):
+            this_record = batch_records[i]
+            
+            # load image and label
+            image_path = os.path.join(self.root_dataset, this_record['fpath_img'])
+            segm_path = os.path.join(self.root_dataset, this_record['fpath_segm'])
+            img = imread(image_path, mode='RGB')
+            segm = imread(segm_path)
 
-        img_scale = imresize(img, scale, interp='bilinear')
-        seg_scale = imresize(seg, scale, interp='nearest')
+            assert(img.ndim == 3)
+            assert(segm.ndim == 2)
+            assert(img.shape[0] == segm.shape[0])
+            assert(img.shape[1] == segm.shape[1])
+
+            if self.random_flip == True:
+                random_flip = np.random.choice([0, 1])
+                if random_flip == 1:
+                    img = cv2.flip(img, 1)
+                    segm = cv2.flip(segm, 1)
+
+            # note that each sample within a mini batch has different scale param
+            img = imresize(img, (batch_resized_size[i, 0], batch_resized_size[i, 1]), interp='bilinear')
+            segm = imresize(segm, (batch_resized_size[i, 0], batch_resized_size[i, 1]), interp='nearest')
+
+            # to avoid seg label misalignment
+            segm_rounded_height = round2nearest_multiple(segm.shape[0], self.segm_downsampling_rate)
+            segm_rounded_width = round2nearest_multiple(segm.shape[1], self.segm_downsampling_rate)
+            segm_rounded = np.zeros((segm_rounded_height, segm_rounded_width), dtype='uint8')
+            segm_rounded[:segm.shape[0], :segm.shape[1]] = segm
+
+            segm = imresize(segm_rounded, (segm_rounded.shape[0] // self.segm_downsampling_rate, \
+                                           segm_rounded.shape[1] // self.segm_downsampling_rate), \
+                            interp='nearest')
+             # image to float
+            img = img.astype(np.float32)[:, :, ::-1] # RGB to BGR!!!
+            img = img.transpose((2, 0, 1))
+            img = self.img_transform(torch.from_numpy(img.copy()))
+           
+            batch_images[i][:, :img.shape[1], :img.shape[2]] = img
+            batch_segms[i][:segm.shape[0], :segm.shape[1]] = torch.from_numpy(segm.astype(np.int)).long()
+        
+        batch_segms = batch_segms - 1 # label from -1 to 149
+        output = dict()
+        output['img_data'] = batch_images
+        output['seg_label'] = batch_segms
+        return output
 
-        h_s, w_s = img_scale.shape[0], img_scale.shape[1]
-        if is_train:
-            # random crop
-            x1 = random.randint(0, w_s - cropSize)
-            y1 = random.randint(0, h_s - cropSize)
-        else:
-            # center crop
-            x1 = (w_s - cropSize) // 2
-            y1 = (h_s - cropSize) // 2
+    def __len__(self):
+        return int(1e6) # It's a fake length due to the trick that every loader maintains its own list
+        #return self.num_sampleclass
 
-        img_crop = img_scale[y1: y1 + cropSize, x1: x1 + cropSize, :]
-        seg_crop = seg_scale[y1: y1 + cropSize, x1: x1 + cropSize]
-        return img_crop, seg_crop
 
-    def _flip(self, img, seg):
-        img_flip = img[:, ::-1, :]
-        seg_flip = seg[:, ::-1]
-        return img_flip, seg_flip
+class ValDataset(torchdata.Dataset):
+    def __init__(self, odgt, opt, max_sample=-1):
+        self.root_dataset = opt.root_dataset
+        self.imgSize = opt.imgSize
+        self.imgMaxSize = opt.imgMaxSize
+        # max down sampling rate of network to avoid rounding during conv or pooling
+        self.padding_constant = opt.padding_constant
+        # down sampling rate of segm labe
+        self.segm_downsampling_rate = opt.segm_downsampling_rate 
 
-    def __getitem__(self, index):
-        img_basename = self.list_sample[index]
-        path_img = os.path.join(self.root_img, img_basename)
-        path_seg = os.path.join(self.root_seg,
-                                img_basename.replace('.jpg', '.png'))
+        # mean and std
+        self.img_transform = transforms.Compose([
+            transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.])
+            ])
 
-        assert os.path.exists(path_img), '[{}] does not exist'.format(path_img)
-        assert os.path.exists(path_seg), '[{}] does not exist'.format(path_seg)
+        self.list_sample = [json.loads(x.rstrip()) for x in open(odgt, 'r')]
 
-        # load image and label
-        try:
-            img = imread(path_img, mode='RGB')
-            seg = imread(path_seg)
-            assert(img.ndim == 3)
-            assert(seg.ndim == 2)
-            assert(img.shape[0] == seg.shape[0])
-            assert(img.shape[1] == seg.shape[1])
+        if max_sample > 0:
+            self.list_sample = self.list_sample[0:max_sample]
+        self.num_sample = len(self.list_sample)
+        assert self.num_sample > 0
+        print('# samples: {}'.format(self.num_sample))
 
-            # random scale, crop, flip
-            if self.imgSize > 0:
-                img, seg = self._scale_and_crop(img, seg,
-                                                self.imgSize, self.is_train)
-                if random.choice([-1, 1]) > 0:
-                    img, seg = self._flip(img, seg)
 
+    def __getitem__(self, index):
+        this_record = self.list_sample[index]
+        # load image and label
+        image_path = os.path.join(self.root_dataset, this_record['fpath_img'])
+        segm_path = os.path.join(self.root_dataset, this_record['fpath_segm'])
+        img = imread(image_path, mode='RGB')
+        img = img[:, :, ::-1] # BGR to RGB!!!
+        segm = imread(segm_path)
+
+        ori_height, ori_width, _ = img.shape        
+        
+        img_resized_list = []
+        for this_short_size in self.imgSize: 
+            # calculate target height and width
+            scale = min(this_short_size / float(min(ori_height, ori_width)), 
+                    self.imgMaxSize / float(max(ori_height, ori_width)))
+            target_height, target_width = int(ori_height * scale), int(ori_width * scale)
+
+            # to avoid rounding in network
+            target_height = round2nearest_multiple(target_height, self.padding_constant)
+            target_width = round2nearest_multiple(target_width, self.padding_constant)
+            
+            # resize
+            img_resized = cv2.resize(img.copy(), (target_width, target_height))
+            
             # image to float
-            img = img.astype(np.float32) / 255.
-            img = img.transpose((2, 0, 1))
-
-            if self.segSize > 0:
-                seg = imresize(seg, (self.segSize, self.segSize),
-                               interp='nearest')
-
-            # label to int from -1 to 149
-            seg = seg.astype(np.int) - 1
-
-            # to torch tensor
-            image = torch.from_numpy(img)
-            segmentation = torch.from_numpy(seg)
-        except Exception as e:
-            print('Failed loading image/segmentation [{}]: {}'
-                  .format(path_img, e))
-            # dummy data
-            image = torch.zeros(3, self.imgSize, self.imgSize)
-            segmentation = -1 * torch.ones(self.segSize, self.segSize).long()
-            return image, segmentation, img_basename
-
-        # substracted by mean and divided by std
-        image = self.img_transform(image)
-
-        return image, segmentation, img_basename
+            img_resized = img_resized.astype(np.float32)
+            img_resized = img_resized.transpose((2, 0, 1))
+            img_resized = self.img_transform(torch.from_numpy(img_resized))
+            
+            img_resized = torch.unsqueeze(img_resized, 0)
+            img_resized_list.append(img_resized)
+       
+        segm = torch.from_numpy(segm.astype(np.int)).long()
+
+        batch_segms = torch.unsqueeze(segm, 0)
+       
+        batch_segms = batch_segms - 1 # label from -1 to 149
+        output = dict()
+        output['img_ori'] = img.copy()
+        output['img_data'] = [x.contiguous() for x in img_resized_list]
+        output['seg_label'] = batch_segms.contiguous()
+        output['info'] = this_record['fpath_img']
+        return output
 
     def __len__(self):
-        return len(self.list_sample)
+        return self.num_sample
+