replace cv2 with PIL

hangzhaomit · hangzhaomit · commit 71c559e66eaf · 2019-08-03T00:18:23.000-04:00
diff --git a/dataset.py b/dataset.py
@@ -1,25 +1,22 @@
 import os
 import json
 import torch
-import cv2
 from torchvision import transforms
 import numpy as np
-import PIL
+from PIL import Image
 
 
 def imresize(im, size, interp='bilinear'):
     if interp == 'nearest':
-        resample = PIL.Image.NEAREST
+        resample = Image.NEAREST
     elif interp == 'bilinear':
-        resample = PIL.Image.BILINEAR
+        resample = Image.BILINEAR
     elif interp == 'bicubic':
-        resample = PIL.Image.BICUBIC
+        resample = Image.BICUBIC
     else:
         raise Exception('resample method undefined!')
 
-    return np.array(
-        PIL.Image.fromarray(im).resize((size[1], size[0]), resample)
-    )
+    return im.resize(size, resample)
 
 
 class BaseDataset(torch.utils.data.Dataset):
@@ -35,7 +32,7 @@ def __init__(self, odgt, opt, **kwargs):
 
         # mean and std
         self.normalize = transforms.Normalize(
-            mean=[102.9801, 115.9465, 122.7717],
+            mean=[122.7717/255., 115.9465/255., 102.9801/255.],
             std=[1., 1., 1.])
 
     def parse_input_list(self, odgt, max_sample=-1, start_idx=-1, end_idx=-1):
@@ -54,12 +51,17 @@ def parse_input_list(self, odgt, max_sample=-1, start_idx=-1, end_idx=-1):
         print('# samples: {}'.format(self.num_sample))
 
     def img_transform(self, img):
-        # image to float
-        img = img.astype(np.float32)
+        # 0-255 to 0-1
+        img = np.float32(np.array(img)) / 255.
         img = img.transpose((2, 0, 1))
         img = self.normalize(torch.from_numpy(img.copy()))
         return img
 
+    def segm_transform(self, segm):
+        # to tensor, -1 to 149
+        segm = torch.from_numpy(np.array(segm)).long() - 1
+        return segm
+
     # Round x to the nearest multiple of p and x' >= x
     def round2nearest_multiple(self, x, p):
         return ((x - 1) // p + 1) * p
@@ -69,7 +71,6 @@ class TrainDataset(BaseDataset):
     def __init__(self, root_dataset, odgt, opt, batch_per_gpu=1, **kwargs):
         super(TrainDataset, self).__init__(odgt, opt, **kwargs)
         self.root_dataset = root_dataset
-        self.random_flip = opt.random_flip
         # down sampling rate of segm labe
         self.segm_downsampling_rate = opt.segm_downsampling_rate
         self.batch_per_gpu = batch_per_gpu
@@ -124,71 +125,74 @@ def __getitem__(self, index):
 
         # calculate the BATCH's height and width
         # since we concat more than one samples, the batch's h and w shall be larger than EACH sample
-        batch_resized_size = np.zeros((self.batch_per_gpu, 2), np.int32)
+        batch_widths = np.zeros(self.batch_per_gpu, np.int32)
+        batch_heights = np.zeros(self.batch_per_gpu, np.int32)
         for i in range(self.batch_per_gpu):
             img_height, img_width = batch_records[i]['height'], batch_records[i]['width']
             this_scale = min(
                 this_short_size / min(img_height, img_width), \
                 self.imgMaxSize / max(img_height, img_width))
-            img_resized_height, img_resized_width = img_height * this_scale, img_width * this_scale
-            batch_resized_size[i, :] = img_resized_height, img_resized_width
-        batch_resized_height = np.max(batch_resized_size[:, 0])
-        batch_resized_width = np.max(batch_resized_size[:, 1])
+            batch_widths[i] = img_width * this_scale
+            batch_heights[i] = img_height * this_scale
 
         # Here we must pad both input image and segmentation map to size h' and w' so that p | h' and p | w'
-        batch_resized_height = int(self.round2nearest_multiple(batch_resized_height, self.padding_constant))
-        batch_resized_width = int(self.round2nearest_multiple(batch_resized_width, self.padding_constant))
-
-        assert self.padding_constant >= self.segm_downsampling_rate,\
-                'padding constant must be equal or large than segm downsamping rate'
-        batch_images = torch.zeros(self.batch_per_gpu, 3, batch_resized_height, batch_resized_width)
+        batch_width = np.max(batch_widths)
+        batch_height = np.max(batch_heights)
+        batch_width = int(self.round2nearest_multiple(batch_width, self.padding_constant))
+        batch_height = int(self.round2nearest_multiple(batch_height, self.padding_constant))
+
+        assert self.padding_constant >= self.segm_downsampling_rate, \
+            'padding constant must be equal or large than segm downsamping rate'
+        batch_images = torch.zeros(
+            self.batch_per_gpu, 3, batch_height, batch_width)
         batch_segms = torch.zeros(
-            self.batch_per_gpu, batch_resized_height // self.segm_downsampling_rate, \
-            batch_resized_width // self.segm_downsampling_rate).long()
+            self.batch_per_gpu,
+            batch_height // self.segm_downsampling_rate,
+            batch_width // self.segm_downsampling_rate).long()
 
         for i in range(self.batch_per_gpu):
             this_record = batch_records[i]
 
             # load image and label
             image_path = os.path.join(self.root_dataset, this_record['fpath_img'])
             segm_path = os.path.join(self.root_dataset, this_record['fpath_segm'])
-            img = cv2.imread(image_path, cv2.IMREAD_COLOR)
-            segm = cv2.imread(segm_path, cv2.IMREAD_GRAYSCALE)
 
-            assert(img.ndim == 3)
-            assert(segm.ndim == 2)
-            assert(img.shape[0] == segm.shape[0])
-            assert(img.shape[1] == segm.shape[1])
+            img = Image.open(image_path).convert('RGB')
+            segm = Image.open(segm_path)
+            assert(segm.mode == "L")
+            assert(img.size[0] == segm.size[0])
+            assert(img.size[1] == segm.size[1])
 
-            if self.random_flip is True:
-                random_flip = np.random.choice([0, 1])
-                if random_flip == 1:
-                    img = cv2.flip(img, 1)
-                    segm = cv2.flip(segm, 1)
+            # random_flip
+            if np.random.choice([0, 1]):
+                img = img.transpose(Image.FLIP_LEFT_RIGHT)
+                segm = segm.transpose(Image.FLIP_LEFT_RIGHT)
 
             # note that each sample within a mini batch has different scale param
-            img = imresize(img, (batch_resized_size[i, 0], batch_resized_size[i, 1]), interp='bilinear')
-            segm = imresize(segm, (batch_resized_size[i, 0], batch_resized_size[i, 1]), interp='nearest')
-
-            # to avoid seg label misalignment
-            segm_rounded_height = self.round2nearest_multiple(segm.shape[0], self.segm_downsampling_rate)
-            segm_rounded_width = self.round2nearest_multiple(segm.shape[1], self.segm_downsampling_rate)
-            segm_rounded = np.zeros((segm_rounded_height, segm_rounded_width), dtype='uint8')
-            segm_rounded[:segm.shape[0], :segm.shape[1]] = segm
-
+            img = imresize(img, (batch_widths[i], batch_heights[i]), interp='bilinear')
+            segm = imresize(segm, (batch_widths[i], batch_heights[i]), interp='nearest')
+
+            # further downsample seg label, need to avoid seg label misalignment
+            segm_rounded_width = self.round2nearest_multiple(segm.size[0], self.segm_downsampling_rate)
+            segm_rounded_height = self.round2nearest_multiple(segm.size[1], self.segm_downsampling_rate)
+            segm_rounded = Image.new('L', (segm_rounded_width, segm_rounded_height), 0)
+            segm_rounded.paste(segm, (0, 0))
             segm = imresize(
                 segm_rounded,
-                (segm_rounded.shape[0] // self.segm_downsampling_rate, \
-                 segm_rounded.shape[1] // self.segm_downsampling_rate), \
+                (segm_rounded.size[0] // self.segm_downsampling_rate, \
+                 segm_rounded.size[1] // self.segm_downsampling_rate), \
                 interp='nearest')
 
-            # image transform
+            # image transform, to torch float tensor 3xHxW
             img = self.img_transform(img)
 
+            # segm transform, to torch long tensor HxW
+            segm = self.segm_transform(segm)
+
+            # put into batch arrays
             batch_images[i][:, :img.shape[1], :img.shape[2]] = img
-            batch_segms[i][:segm.shape[0], :segm.shape[1]] = torch.from_numpy(segm.astype(np.int)).long()
+            batch_segms[i][:segm.shape[0], :segm.shape[1]] = segm
 
-        batch_segms = batch_segms - 1 # label from -1 to 149
         output = dict()
         output['img_data'] = batch_images
         output['seg_label'] = batch_segms
@@ -209,10 +213,13 @@ def __getitem__(self, index):
         # load image and label
         image_path = os.path.join(self.root_dataset, this_record['fpath_img'])
         segm_path = os.path.join(self.root_dataset, this_record['fpath_segm'])
-        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
-        segm = cv2.imread(segm_path, cv2.IMREAD_GRAYSCALE)
+        img = Image.open(image_path).convert('RGB')
+        segm = Image.open(segm_path)
+        assert(segm.mode == "L")
+        assert(img.size[0] == segm.size[0])
+        assert(img.size[1] == segm.size[1])
 
-        ori_height, ori_width, _ = img.shape
+        ori_width, ori_height = img.size
 
         img_resized_list = []
         for this_short_size in self.imgSizes:
@@ -222,24 +229,23 @@ def __getitem__(self, index):
             target_height, target_width = int(ori_height * scale), int(ori_width * scale)
 
             # to avoid rounding in network
-            target_height = self.round2nearest_multiple(target_height, self.padding_constant)
             target_width = self.round2nearest_multiple(target_width, self.padding_constant)
+            target_height = self.round2nearest_multiple(target_height, self.padding_constant)
 
-            # resize
-            img_resized = cv2.resize(img.copy(), (target_width, target_height))
+            # resize images
+            img_resized = imresize(img, (target_width, target_height), interp='bilinear')
 
-            # image transform
+            # image transform, to torch float tensor 3xHxW
             img_resized = self.img_transform(img_resized)
-
             img_resized = torch.unsqueeze(img_resized, 0)
             img_resized_list.append(img_resized)
 
-        segm = torch.from_numpy(segm.astype(np.int)).long()
+        # segm transform, to torch long tensor HxW
+        segm = self.segm_transform(segm)
         batch_segms = torch.unsqueeze(segm, 0)
 
-        batch_segms = batch_segms - 1 # label from -1 to 149
         output = dict()
-        output['img_ori'] = img.copy()
+        output['img_ori'] = np.array(img)
         output['img_data'] = [x.contiguous() for x in img_resized_list]
         output['seg_label'] = batch_segms.contiguous()
         output['info'] = this_record['fpath_img']
@@ -255,11 +261,11 @@ def __init__(self, odgt, opt, **kwargs):
 
     def __getitem__(self, index):
         this_record = self.list_sample[index]
-        # load image and label
+        # load image
         image_path = this_record['fpath_img']
-        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
+        img = Image.open(image_path).convert('RGB')
 
-        ori_height, ori_width, _ = img.shape
+        ori_width, ori_height = img.size
 
         img_resized_list = []
         for this_short_size in self.imgSizes:
@@ -269,19 +275,19 @@ def __getitem__(self, index):
             target_height, target_width = int(ori_height * scale), int(ori_width * scale)
 
             # to avoid rounding in network
-            target_height = self.round2nearest_multiple(target_height, self.padding_constant)
             target_width = self.round2nearest_multiple(target_width, self.padding_constant)
+            target_height = self.round2nearest_multiple(target_height, self.padding_constant)
 
-            # resize
-            img_resized = cv2.resize(img.copy(), (target_width, target_height))
+            # resize images
+            img_resized = imresize(img, (target_width, target_height), interp='bilinear')
 
-            # image transform
+            # image transform, to torch float tensor 3xHxW
             img_resized = self.img_transform(img_resized)
             img_resized = torch.unsqueeze(img_resized, 0)
             img_resized_list.append(img_resized)
 
         output = dict()
-        output['img_ori'] = img.copy()
+        output['img_ori'] = np.array(img)
         output['img_data'] = [x.contiguous() for x in img_resized_list]
         output['info'] = this_record['fpath_img']
         return output
diff --git a/eval.py b/eval.py
@@ -15,7 +15,7 @@
 from utils import AverageMeter, colorEncode, accuracy, intersectionAndUnion, setup_logger
 from lib.nn import user_scattered_collate, async_copy_to
 from lib.utils import as_numpy
-import cv2
+from PIL import Image
 from tqdm import tqdm
 
 colors = loadmat('data/color150.mat')['colors']
@@ -35,10 +35,7 @@ def visualize_result(data, pred, dir_result):
                             axis=1).astype(np.uint8)
 
     img_name = info.split('/')[-1]
-    cv2.imwrite(
-        os.path.join(dir_result, img_name.replace('.jpg', '.png')),
-        im_vis
-    )
+    Image.fromarray(im_vis).save(os.path.join(dir_result, img_name.replace('.jpg', '.png')))
 
 
 def evaluate(segmentation_module, loader, cfg, gpu):
diff --git a/eval_multipro.py b/eval_multipro.py
@@ -16,7 +16,7 @@
 from utils import AverageMeter, colorEncode, accuracy, intersectionAndUnion, parse_devices, setup_logger
 from lib.nn import user_scattered_collate, async_copy_to
 from lib.utils import as_numpy
-import cv2
+from PIL import Image
 from tqdm import tqdm
 
 colors = loadmat('data/color150.mat')['colors']
@@ -36,10 +36,7 @@ def visualize_result(data, pred, dir_result):
                             axis=1).astype(np.uint8)
 
     img_name = info.split('/')[-1]
-    cv2.imwrite(
-        os.path.join(dir_result, img_name.replace('.jpg', '.png')),
-        im_vis
-    )
+    Image.fromarray(im_vis).save(os.path.join(dir_result, img_name.replace('.jpg', '.png')))
 
 
 def evaluate(segmentation_module, loader, cfg, gpu_id, result_queue):
@@ -112,6 +109,8 @@ def worker(cfg, gpu_id, start_idx, end_idx, result_queue):
         weights=cfg.MODEL.weights_decoder,
         use_softmax=True)
 
+    net_encoder.features[0][0].weight.data = net_encoder.features[0][0].weight.data[:, (2,1,0), :, :] * 255.
+
     crit = nn.NLLLoss(ignore_index=-1)
 
     segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)
diff --git a/test.py b/test.py
@@ -14,7 +14,7 @@
 from utils import colorEncode, find_recursive, setup_logger
 from lib.nn import user_scattered_collate, async_copy_to
 from lib.utils import as_numpy
-import cv2
+from PIL import Image
 from tqdm import tqdm
 from config import cfg
 
@@ -48,8 +48,8 @@ def visualize_result(data, pred, cfg):
     im_vis = np.concatenate((img, pred_color), axis=1)
 
     img_name = info.split('/')[-1]
-    cv2.imwrite(os.path.join(cfg.TEST.result,
-                img_name.replace('.jpg', '.png')), im_vis)
+    Image.fromarray(im_vis).save(
+        os.path.join(cfg.TEST.result, img_name.replace('.jpg', '.png')))
 
 
 def test(segmentation_module, loader, gpu):