1. Fix batch_size to 1

ruotianluo · ruotianluo · commit 88c6583833b5 · 2017-10-19T16:49:09.000-05:00
2. Specify the rpn channel size in cfg.

Merge commit 'b2a31d9923e5a459fdb6d8c9552290e6d8939600'

* commit 'b2a31d9923e5a459fdb6d8c9552290e6d8939600':
  Update config.py
  remove batch size.
  Update minibatch.py
  remove batch size.

# Conflicts:
#	lib/model/test.py
#	lib/nets/network.py
#	lib/nets/resnet_v1.py
#	lib/nets/vgg16.py
#	tools/convert_from_depre.py
diff --git a/lib/layer_utils/anchor_target_layer.py b/lib/layer_utils/anchor_target_layer.py
@@ -21,7 +21,6 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anch
   A = num_anchors
   total_anchors = all_anchors.shape[0]
   K = total_anchors / num_anchors
-  im_info = im_info[0]
 
   # allow boxes to sit over the edge by a small amount
   _allowed_border = 0
diff --git a/lib/layer_utils/proposal_layer.py b/lib/layer_utils/proposal_layer.py
@@ -26,7 +26,6 @@ def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
   post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
   nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
 
-  im_info = im_info[0]
   # Get the scores and bounding boxes
   scores = rpn_cls_prob[:, :, :, num_anchors:]
   rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
diff --git a/lib/layer_utils/proposal_top_layer.py b/lib/layer_utils/proposal_top_layer.py
@@ -20,7 +20,6 @@ def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, ancho
      For details please see the technical report
   """
   rpn_top_n = cfg.TEST.RPN_TOP_N
-  im_info = im_info[0]
 
   scores = rpn_cls_prob[:, :, :, num_anchors:]
 
diff --git a/lib/model/config.py b/lib/model/config.py
@@ -281,6 +281,9 @@
 # Anchor ratios for RPN
 __C.ANCHOR_RATIOS = [0.5,1,2]
 
+# Number of filters for the RPN layer
+__C.RPN_CHANNELS = 512
+
 
 def get_output_dir(imdb, weights_filename):
   """Return the directory where experimental artifacts are placed.
diff --git a/lib/model/test.py b/lib/model/test.py
@@ -90,7 +90,7 @@ def im_detect(net, im):
   assert len(im_scales) == 1, "Only single-image batch implemented"
 
   im_blob = blobs['data']
-  blobs['im_info'] = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)
+  blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32)
 
   _, scores, bbox_pred, rois = net.test_image(blobs['data'], blobs['im_info'])
   
diff --git a/lib/model/train_val.py b/lib/model/train_val.py
@@ -249,7 +249,7 @@ def train_model(self, max_iters):
       blobs = self.data_layer.forward()
 
       now = time.time()
-      if now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL:
+      if iter == 1 or now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL:
         # Compute the graph with summary
         rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = \
           self.net.train_step_with_summary(blobs, self.optimizer)
diff --git a/lib/nets/mobilenet_v1.py b/lib/nets/mobilenet_v1.py
@@ -173,9 +173,7 @@ def mobilenet_v1_base(inputs,
 
 # Modified arg_scope to incorporate configs
 def mobilenet_v1_arg_scope(is_training=True,
-                           weight_decay=cfg.MOBILENET.WEIGHT_DECAY,
-                           stddev=0.09,
-                           regularize_depthwise=cfg.MOBILENET.REGU_DEPTH):
+                           stddev=0.09):
   batch_norm_params = {
       'is_training': False,
       'center': True,
@@ -187,8 +185,8 @@ def mobilenet_v1_arg_scope(is_training=True,
 
   # Set weight_decay for weights in Conv and DepthSepConv layers.
   weights_init = tf.truncated_normal_initializer(stddev=stddev)
-  regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
-  if regularize_depthwise:
+  regularizer = tf.contrib.layers.l2_regularizer(cfg.MOBILENET.WEIGHT_DECAY)
+  if cfg.MOBILENET.REGU_DEPTH:
     depthwise_regularizer = regularizer
   else:
     depthwise_regularizer = None
@@ -206,8 +204,10 @@ def mobilenet_v1_arg_scope(is_training=True,
           return sc
 
 class mobilenetv1(Network):
-  def __init__(self, batch_size=1):
-    Network.__init__(self, batch_size=batch_size)
+  def __init__(self):
+    Network.__init__(self)
+    self._feat_stride = [16, ]
+    self._feat_compress = [1. / float(self._feat_stride[0]), ]
     self._depth_multiplier = cfg.MOBILENET.DEPTH_MULTIPLIER
     self._scope = 'MobilenetV1'
 
diff --git a/lib/nets/network.py b/lib/nets/network.py
@@ -29,12 +29,11 @@
 
 import tensorboardX as tb
 
+from scipy.misc import imresize
+
 class Network(nn.Module):
-  def __init__(self, batch_size=1):
+  def __init__(self):
     nn.Module.__init__(self)
-    self._feat_stride = [16, ]
-    self._feat_compress = [1. / 16., ]
-    self._batch_size = batch_size
     self._predictions = {}
     self._losses = {}
     self._anchor_targets = {}
@@ -50,8 +49,9 @@ def __init__(self, batch_size=1):
   def _add_gt_image(self):
     # add back mean
     image = self._image_gt_summaries['image'] + cfg.PIXEL_MEANS
+    resized = imresize(image, self._im_info[:2] / self._im_info[2])
     # BGR to RGB (opencv uses BGR)
-    self._gt_image = image[:,:,:,::-1].copy(order='C')
+    self._gt_image = image[:,:,::-1].copy(order='C')
 
   def _add_gt_image_summary(self):
     # use a customized visualization function to visualize the boxes
diff --git a/lib/nets/resnet_v1.py b/lib/nets/resnet_v1.py
@@ -206,8 +206,10 @@ def resnet152(pretrained=False):
   return model
 
 class resnetv1(Network):
-  def __init__(self, batch_size=1, num_layers=50):
-    Network.__init__(self, batch_size=batch_size)
+  def __init__(self, num_layers=50):
+    Network.__init__(self)
+    self._feat_stride = [16, ]
+    self._feat_compress = [1. / float(self._feat_stride[0]), ]
     self._num_layers = num_layers
 
   def _crop_pool_layer(self, bottom, rois):
diff --git a/lib/nets/vgg16.py b/lib/nets/vgg16.py
@@ -18,8 +18,10 @@
 import torchvision.models as models
 
 class vgg16(Network):
-  def __init__(self, batch_size=1):
-    Network.__init__(self, batch_size=batch_size)
+  def __init__(self):
+    Network.__init__(self)
+    self._feat_stride = [16, ]
+    self._feat_compress = [1. / float(self._feat_stride[0]), ]
 
   def _init_modules(self):
     self.vgg = models.vgg16()
diff --git a/lib/roi_data_layer/minibatch.py b/lib/roi_data_layer/minibatch.py
@@ -46,7 +46,7 @@ def get_minibatch(roidb, num_classes):
   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
   blobs['gt_boxes'] = gt_boxes
   blobs['im_info'] = np.array(
-    [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
+    [im_blob.shape[1], im_blob.shape[2], im_scales[0]],
     dtype=np.float32)
 
   return blobs
diff --git a/tools/test_net.py b/tools/test_net.py
@@ -71,7 +71,7 @@ def parse_args():
   pprint.pprint(cfg)
 
   # if has model, get the name from it
-  # if does not, then just use the inialization weights
+  # if does not, then just use the initialization weights
   if args.model:
     filename = os.path.splitext(os.path.basename(args.model))[0]
   else:
@@ -86,15 +86,15 @@ def parse_args():
 
   # load network
   if args.net == 'vgg16':
-    net = vgg16(batch_size=1)
+    net = vgg16()
   elif args.net == 'res50':
-    net = resnetv1(batch_size=1, num_layers=50)
+    net = resnetv1(num_layers=50)
   elif args.net == 'res101':
-    net = resnetv1(batch_size=1, num_layers=101)
+    net = resnetv1(num_layers=101)
   elif args.net == 'res152':
-    net = resnetv1(batch_size=1, num_layers=152)
+    net = resnetv1(num_layers=152)
   elif args.net == 'mobile':
-    net = mobilenetv1(batch_size=1)
+    net = mobilenetv1()
   else:
     raise NotImplementedError
 
diff --git a/tools/trainval_net.py b/tools/trainval_net.py
@@ -121,15 +121,15 @@ def get_roidb(imdb_name):
 
   # load network
   if args.net == 'vgg16':
-    net = vgg16(batch_size=cfg.TRAIN.IMS_PER_BATCH)
+    net = vgg16()
   elif args.net == 'res50':
-    net = resnetv1(batch_size=cfg.TRAIN.IMS_PER_BATCH, num_layers=50)
+    net = resnetv1(num_layers=50)
   elif args.net == 'res101':
-    net = resnetv1(batch_size=cfg.TRAIN.IMS_PER_BATCH, num_layers=101)
+    net = resnetv1(num_layers=101)
   elif args.net == 'res152':
-    net = resnetv1(batch_size=cfg.TRAIN.IMS_PER_BATCH, num_layers=152)
+    net = resnetv1(num_layers=152)
   elif args.net == 'mobile':
-    net = mobilenetv1(batch_size=cfg.TRAIN.IMS_PER_BATCH)
+    net = mobilenetv1()
   else:
     raise NotImplementedError