support cpu training and testing

zisianw · zisianw · commit f8292fe3fea0 · 2019-03-06T14:50:36.000+08:00
diff --git a/README.md b/README.md
@@ -75,6 +75,8 @@ $FaceBoxes_ROOT/data/FDDB/images/
 ```Shell
 # dataset choices = ['AFW', 'PASCAL', 'FDDB']
 python3 test.py --dataset FDDB
+# evaluate using cpu
+python3 test.py --cpu
 ```
 
 3. Download [eval_tool](https://bitbucket.org/marcopede/face-eval) to evaluate the performance.
diff --git a/data/config.py b/data/config.py
@@ -9,5 +9,6 @@
     'aspect_ratios': [[1], [1], [1]],
     'variance': [0.1, 0.2],
     'clip': False,
-    'loc_weight': 2.0
+    'loc_weight': 2.0,
+    'gpu_train': True
 }
diff --git a/data/data_augment.py b/data/data_augment.py
@@ -4,8 +4,9 @@
 from utils.box_utils import matrix_iof
 
 
-def _crop(image, boxes, labels, img_dim, rgb_means):
+def _crop(image, boxes, labels, img_dim):
     height, width, _ = image.shape
+    pad_image_flag = True
 
     for _ in range(250):
         if random.uniform(0, 1) <= 0.2:
@@ -41,7 +42,7 @@ def _crop(image, boxes, labels, img_dim, rgb_means):
         b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
         mask_b = np.minimum(b_w_t, b_h_t) > 16.0
         boxes_t = boxes_t[mask_b]
-        labels_t = labels_t[mask_b].copy()
+        labels_t = labels_t[mask_b]
 
         if boxes_t.shape[0] == 0:
             continue
@@ -53,14 +54,10 @@ def _crop(image, boxes, labels, img_dim, rgb_means):
         boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
         boxes_t[:, 2:] -= roi[:2]
 
-        return image_t, boxes_t, labels_t
+        pad_image_flag = False
 
-    long_side = max(width, height)
-    image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
-    image_t[:, :] = rgb_means
-    image_t[0:0 + height, 0:0 + width] = image
-
-    return image_t, boxes, labels
+        return image_t, boxes_t, labels_t, pad_image_flag
+    return image, boxes, labels, pad_image_flag
 
 
 def _distort(image):
@@ -85,7 +82,7 @@ def _convert(image, alpha=1, beta=0):
 
         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 
-        # saturation distortion
+        #saturation distortion
         if random.randrange(2):
             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
 
@@ -105,7 +102,7 @@ def _convert(image, alpha=1, beta=0):
 
         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 
-        # saturation distortion
+        #saturation distortion
         if random.randrange(2):
             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
 
@@ -159,12 +156,23 @@ def _mirror(image, boxes):
     return image, boxes
 
 
-def preproc_for_test(image, insize, mean):
+def _pad_to_square(image, rgb_mean, pad_image_flag):
+    if not pad_image_flag:
+        return image
+    height, width, _ = image.shape
+    long_side = max(width, height)
+    image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
+    image_t[:, :] = rgb_mean
+    image_t[0:0 + height, 0:0 + width] = image
+    return image_t
+
+
+def _resize_subtract_mean(image, insize, rgb_mean):
     interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
     interp_method = interp_methods[random.randrange(5)]
     image = cv2.resize(image, (insize, insize), interpolation=interp_method)
     image = image.astype(np.float32)
-    image -= mean
+    image -= rgb_mean
     return image.transpose(2, 0, 1)
 
 
@@ -181,12 +189,15 @@ def __call__(self, image, targets):
         boxes = targets[:, :-1].copy()
         labels = targets[:, -1].copy()
 
-        image_t = _distort(image)
-        # image_t, boxes_t = _expand(image_t, boxes, self.cfg['rgb_mean'], self.cfg['max_expand_ratio'])
-        image_t, boxes_t, labels_t = _crop(image_t, boxes, labels, self.img_dim, self.rgb_means)
+        #image_t = _distort(image)
+        #image_t, boxes_t = _expand(image_t, boxes, self.cfg['rgb_mean'], self.cfg['max_expand_ratio'])
+        #image_t, boxes_t, labels_t = _crop(image_t, boxes, labels, self.img_dim, self.rgb_means)
+        image_t, boxes_t, labels_t, pad_image_flag = _crop(image, boxes, labels, self.img_dim)
+        image_t = _distort(image_t)
+        image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
         image_t, boxes_t = _mirror(image_t, boxes_t)
         height, width, _ = image_t.shape
-        image_t = preproc_for_test(image_t, self.img_dim, self.rgb_means)
+        image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
         boxes_t[:, 0::2] /= width
         boxes_t[:, 1::2] /= height
 
diff --git a/layers/modules/multibox_loss.py b/layers/modules/multibox_loss.py
@@ -3,10 +3,8 @@
 import torch.nn.functional as F
 from torch.autograd import Variable
 from utils.box_utils import match, log_sum_exp
-GPU = False
-if torch.cuda.is_available():
-    GPU = True
-
+from data import cfg
+GPU = cfg['gpu_train']
 
 class MultiBoxLoss(nn.Module):
     """SSD Weighted Loss Function
diff --git a/models/faceboxes.py b/models/faceboxes.py
@@ -123,7 +123,7 @@ def forward(self, x):
     x = self.inception1(x)
     x = self.inception2(x)
     x = self.inception3(x)
-    detection_dimension += [x.shape[2:]]
+    detection_dimension.append(x.shape[2:])
     sources.append(x)
     x = self.conv3_1(x)
     x = self.conv3_2(x)
@@ -134,8 +134,7 @@ def forward(self, x):
     detection_dimension.append(x.shape[2:])
     sources.append(x)
     
-    detection_dimension = torch.Tensor(detection_dimension)
-    detection_dimension = detection_dimension.cuda()
+    detection_dimension = torch.tensor(detection_dimension, device=x.device)
 
     for (x, l, c) in zip(sources, self.loc, self.conf):
         loc.append(l(x).permute(0, 2, 3, 1).contiguous())
diff --git a/test.py b/test.py
@@ -7,6 +7,7 @@
 from data import cfg
 from layers.functions.prior_box import PriorBox
 from utils.nms_wrapper import nms
+#from utils.nms.py_cpu_nms import py_cpu_nms
 import cv2
 from models.faceboxes import FaceBoxes
 from utils.box_utils import decode
@@ -17,9 +18,8 @@
 parser.add_argument('-m', '--trained_model', default='weights/FaceBoxes.pth',
                     type=str, help='Trained state_dict file path to open')
 parser.add_argument('--save_folder', default='eval/', type=str, help='Dir to save results')
-parser.add_argument('--cuda', default=True, type=bool, help='Use cuda to train model')
-parser.add_argument('--cpu', default=False, type=bool, help='Use cpu nms')
-parser.add_argument('--dataset', default='FDDB', type=str, choices=['AFW', 'PASCAL', 'FDDB'], help='dataset')
+parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
+parser.add_argument('--dataset', default='PASCAL', type=str, choices=['AFW', 'PASCAL', 'FDDB'], help='dataset')
 parser.add_argument('--confidence_threshold', default=0.05, type=float, help='confidence_threshold')
 parser.add_argument('--top_k', default=5000, type=int, help='top_k')
 parser.add_argument('--nms_threshold', default=0.3, type=float, help='nms_threshold')
@@ -47,10 +47,13 @@ def remove_prefix(state_dict, prefix):
     return {f(key): value for key, value in state_dict.items()}
 
 
-def load_model(model, pretrained_path):
+def load_model(model, pretrained_path, load_to_cpu):
     print('Loading pretrained model from {}'.format(pretrained_path))
-    device = torch.cuda.current_device()
-    pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    if load_to_cpu:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+    else:
+        device = torch.cuda.current_device()
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
     if "state_dict" in pretrained_dict.keys():
         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
     else:
@@ -61,17 +64,17 @@ def load_model(model, pretrained_path):
 
 
 if __name__ == '__main__':
+    torch.set_grad_enabled(False)
     # net and model
     net = FaceBoxes(phase='test', size=None, num_classes=2)    # initialize detector
-    net = load_model(net, args.trained_model)
+    net = load_model(net, args.trained_model, args.cpu)
     net.eval()
     print('Finished loading model!')
     print(net)
-    if args.cuda:
-        net = net.cuda()
-        cudnn.benchmark = True
-    else:
-        net = net.cpu()
+    cudnn.benchmark = True
+    device = torch.device("cpu" if args.cpu else "cuda")
+    net = net.to(device)
+
 
     # save file
     if not os.path.exists(args.save_folder):
@@ -106,18 +109,16 @@ def load_model(model, pretrained_path):
         img -= (104, 117, 123)
         img = img.transpose(2, 0, 1)
         img = torch.from_numpy(img).unsqueeze(0)
-        if args.cuda:
-            img = img.cuda()
-            scale = scale.cuda()
+        img = img.to(device)
+        scale = scale.to(device)
 
         _t['forward_pass'].tic()
         out = net(img)  # forward pass
         _t['forward_pass'].toc()
         _t['misc'].tic()
         priorbox = PriorBox(cfg, out[2], (im_height, im_width), phase='test')
         priors = priorbox.forward()
-        if args.cuda:
-            priors = priors.cuda()
+        priors = priors.to(device)
         loc, conf, _ = out
         prior_data = priors.data
         boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
@@ -137,7 +138,8 @@ def load_model(model, pretrained_path):
 
         # do NMS
         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
-        keep = nms(dets, args.nms_threshold, force_cpu=args.cpu)
+        #keep = py_cpu_nms(dets, args.nms_threshold)
+        keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
         dets = dets[keep, :]
 
         # keep top-K faster NMS
diff --git a/train.py b/train.py
@@ -1,6 +1,5 @@
 from __future__ import print_function
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
 import torch
 import torch.optim as optim
 import torch.backends.cudnn as cudnn
@@ -18,7 +17,6 @@
 parser.add_argument('--training_dataset', default='./data/WIDER_FACE', help='Training dataset directory')
 parser.add_argument('-b', '--batch_size', default=32, type=int, help='Batch size for training')
 parser.add_argument('--num_workers', default=8, type=int, help='Number of workers used in dataloading')
-parser.add_argument('--cuda', default=True, type=bool, help='Use cuda to train model')
 parser.add_argument('--ngpu', default=2, type=int, help='gpus')
 parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate')
 parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
@@ -34,12 +32,13 @@
     os.mkdir(args.save_folder)
 
 img_dim = 1024
-rgb_means = (104, 117, 123)
+rgb_means = (104, 117, 123) #bgr order
 num_classes = 2
 batch_size = args.batch_size
 weight_decay = args.weight_decay
 gamma = args.gamma
 momentum = args.momentum
+gpu_train = cfg['gpu_train']
 
 net = FaceBoxes('train', img_dim, num_classes)
 print("Printing net...")
@@ -60,21 +59,20 @@
         new_state_dict[name] = v
     net.load_state_dict(new_state_dict)
 
-if args.ngpu > 1:
+if args.ngpu > 1 and gpu_train:
     net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
 
-if args.cuda:
-    net.cuda()
-    cudnn.benchmark = True
+device = torch.device('cuda:0' if gpu_train else 'cpu')
+cudnn.benchmark = True
+net = net.to(device)
 
 optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
 criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)
 
 priorbox = PriorBox(cfg)
 with torch.no_grad():
     priors = priorbox.forward()
-    if args.cuda:
-        priors = priors.cuda()
+    priors = priors.to(device)
 
 
 def train():
@@ -110,7 +108,7 @@ def train():
 
         # load train data
         images, targets = next(batch_iterator)
-        if args.cuda:
+        if gpu_train:
             images = Variable(images.cuda())
             targets = [Variable(anno.cuda()) for anno in targets]
         else:

Original file line number	Diff line number	Diff line change
`@@ -9,5 +9,6 @@`
`9`	`9`	`'aspect_ratios': [[1], [1], [1]],`
`10`	`10`	`'variance': [0.1, 0.2],`
`11`	`11`	`'clip': False,`
`12`		`- 'loc_weight': 2.0`
	`12`	`+ 'loc_weight': 2.0,`
	`13`	`+ 'gpu_train': True`
`13`	`14`	`}`