Merge pull request tqchen#12 from ZihengJiang/resnet

tqchen · web-flow · commit d518ad7411a5 · 2016-11-20T22:52:53.000-08:00
support resnet on cifar
diff --git a/example/cifar_resnet.py b/example/cifar_resnet.py
@@ -0,0 +1,90 @@
+import tinyflow as tf
+from tinyflow.datasets import get_cifar10
+import numpy as np
+
+num_epoch  = 10
+num_batch  = 600
+batch_size = 100
+
+
+def conv_factory(x, filter_size, in_filters, out_filters):
+    x = tf.nn.conv2d(x, num_filter=out_filters,
+            ksize=[1, filter_size, filter_size, 1], padding='SAME')
+    x = tf.nn.batch_normalization(x)
+    x = tf.nn.relu(x)
+    return x
+
+def residual_factory(x, in_filters, out_filters):
+    if in_filters == out_filters:
+        orig_x = x
+        conv1 = conv_factory(x,     3, in_filters,  out_filters)
+        conv2 = conv_factory(conv1, 3, out_filters, out_filters)
+        new = orig_x + conv2
+        return tf.nn.relu(new)
+    else:
+        conv1     = conv_factory(x,     3, in_filters,  out_filters)
+        conv2     = conv_factory(conv1, 3, out_filters, out_filters)
+        project_x = conv_factory(x,     1, in_filters,  out_filters)
+        new = project_x + conv2
+        return tf.nn.relu(new)
+
+def resnet(x, n, in_filters, out_filters):
+    for i in range(n):
+        if i == 0:
+            x = residual_factory(x, in_filters, 16)
+        else:
+            x = residual_factory(x, 16, 16)
+    for i in range(n):
+        if i == 0:
+            x = residual_factory(x, 16, 32)
+        else:
+            x = residual_factory(x, 32, 32)
+    for i in range(n):
+        if i == 0:
+            x = residual_factory(x, 32, 64)
+        else:
+            x = residual_factory(x, 64, 64)
+    return x
+
+
+x = tf.placeholder(tf.float32)
+conv1 = tf.nn.conv2d(x, num_filter=16, ksize=[1, 5, 5, 1], padding='SAME')
+tanh1 = tf.tanh(conv1)
+res = resnet(tanh1, 1, 16, 64)
+pool1 = tf.nn.avg_pool(res, ksize=[1, 4, 4, 1], strides=[1, 2, 2, 1], padding='SAME', data_format='NCHW')
+conv2 = tf.nn.conv2d(pool1, num_filter=16, ksize=[1, 5, 5, 1])
+flatten = tf.nn.flatten_layer(conv2)
+fc1 = tf.nn.linear(flatten, num_hidden=10, name="fc1")
+
+# define loss
+label = tf.placeholder(tf.float32)
+cross_entropy = tf.nn.mean_sparse_softmax_cross_entropy_with_logits(fc1, label)
+train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
+
+sess = tf.Session(config='gpu')
+
+# Auromatic variable shape inference API, infers the shape and initialize the weights.
+known_shape = {x: [batch_size, 3, 32, 32], label: [batch_size]}
+stdev = 0.01
+init_step = []
+for v, name, shape in tf.infer_variable_shapes(
+        cross_entropy, feed_dict=known_shape):
+    init_step.append(tf.assign(v, tf.normal(shape, stdev)))
+    print("shape[%s]=%s" % (name, str(shape)))
+sess.run(init_step)
+sess.run(tf.initialize_all_variables())
+
+# get the cifar dataset
+cifar = get_cifar10()
+
+for epoch in range(num_epoch):
+    sum_loss = 0.0
+    for i in range(num_batch):
+        batch_xs, batch_ys = cifar.train.next_batch(batch_size)
+        loss, _ = sess.run([cross_entropy, train_step], feed_dict={x: batch_xs, label:batch_ys})
+        sum_loss += loss
+    print("epoch[%d] cross_entropy=%g" % (epoch, sum_loss /num_batch))
+
+correct_prediction = tf.equal(tf.argmax(fc1, 1), label)
+accuracy = tf.reduce_mean(correct_prediction)
+print(sess.run(accuracy, feed_dict={x: cifar.test.images, label: cifar.test.labels}))
diff --git a/nnvm b/nnvm
@@ -1 +1 @@
-Subproject commit 24441f9635995bedcb40c4dd3395933b1ccf189f
+Subproject commit 5dc84b1bffd1f08ded76c4e89587fb1dee6ee38a
diff --git a/python/tinyflow/_base.py b/python/tinyflow/_base.py
@@ -35,32 +35,35 @@
 # global list of all variable initializers
 _all_variable_inits = []
 
-def placeholder(dtype, shape=None, name=None):
-    v = symbol.placeholder(name=name, dtype=dtype)
-    return v
-
 
-def Variable(init, name=None):
-    if not isinstance(init, symbol.Symbol):
-        raise TypeError("Expect initialization expression to be Symbol")
+def Variable(init=None, name=None):
     name = NameManager.current.get(name, 'variable')
     v = symbol.Variable(name)
-    _all_variable_inits.append(symbol.assign(v, init))
+    if init is not None:
+        if not isinstance(init, symbol.Symbol):
+            raise TypeError("Expect initialization expression to be Symbol")
+        _all_variable_inits.append(symbol.assign(v, init))
     return v
 
 
-def group(*inputs):
-    x = _symbol_internal._nop()
-    x._add_control_deps(symbol.Group(inputs))
-    return x
-
-
 def initialize_all_variables():
     global _all_variable_inits
     init_op = group(*_all_variable_inits)
     _all_variable_inits = []
     return init_op
 
+
+def placeholder(dtype, name=None):
+    v = symbol.placeholder(name=name, dtype=dtype)
+    return v
+
+
+def group(*inputs):
+    x = _symbol_internal._nop()
+    x._add_control_deps(symbol.Group(inputs))
+    return x
+
+
 def gradients(ys, xs, grad_ys=None):
     if isinstance(ys, list):
         ys = symbol.Group(ys)
diff --git a/python/tinyflow/datasets.py b/python/tinyflow/datasets.py
@@ -2,6 +2,11 @@
 import numpy as np
 from collections import namedtuple
 from sklearn.datasets import fetch_mldata
+import cPickle
+import sys
+import os
+from subprocess import call
+
 
 class ArrayPacker(object):
     """Dataset packer for iterator"""
@@ -41,3 +46,61 @@ def get_mnist(flatten=False, onehot=False):
     Y_test = Y[60000:]
     return MNISTData(train=ArrayPacker(X_train, Y_train),
                      test=ArrayPacker(X_test, Y_test))
+
+
+CIFAR10Data = namedtuple("CIFAR10Data", ["train", "test"])
+
+def load_batch(fpath, label_key='labels'):
+    f = open(fpath, 'rb')
+    if sys.version_info < (3,):
+        d = cPickle.load(f)
+    else:
+        d = cPickle.load(f, encoding="bytes")
+        # decode utf8
+        for k, v in d.items():
+            del(d[k])
+        d[k.decode("utf8")] = v
+    f.close()
+    data = d["data"]
+    labels = d[label_key]
+
+    data = data.reshape(data.shape[0], 3, 32, 32).astype(np.float32)
+    labels = np.array(labels, dtype="float32")
+    return data, labels
+
+
+def get_cifar10(swap_axes=False):
+    path = "cifar-10-batches-py"
+    if not os.path.exists(path):
+        tar_file = "cifar-10-python.tar.gz"
+        origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+        if os.path.exists(tar_file):
+            need_download = False
+        else:
+            need_download = True
+        if need_download:
+            call(["wget", origin])
+            call(["tar", "-xvf", "cifar-10-python.tar.gz"])
+        else:
+            call(["tar", "-xvf", "cifar-10-python.tar.gz"])
+
+    nb_train_samples = 50000
+
+    X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="float32")
+    y_train = np.zeros((nb_train_samples,), dtype="float32")
+
+    for i in range(1, 6):
+        fpath = os.path.join(path, 'data_batch_' + str(i))
+        data, labels = load_batch(fpath)
+        X_train[(i - 1) * 10000: i * 10000, :, :, :] = data
+        y_train[(i - 1) * 10000: i * 10000] = labels
+
+    fpath = os.path.join(path, 'test_batch')
+    X_test, y_test = load_batch(fpath)
+
+    if swap_axes:
+        X_train = np.swapaxes(X_train, 1, 3)
+        X_test  = np.swapaxes(X_test,  1, 3)
+
+    return CIFAR10Data(train=ArrayPacker(X_train, y_train),
+            test=ArrayPacker(X_test, y_test))
diff --git a/src/op_nn.cc b/src/op_nn.cc
@@ -178,6 +178,38 @@ NNVM_REGISTER_OP(linear)
 .set_attr<FInferShape>("FInferShape", LinearShape);
 
 
+struct PadParam : public dmlc::Parameter<PadParam> {
+  uint32_t dim;
+  int pad;
+
+  DMLC_DECLARE_PARAMETER(PadParam) {
+    DMLC_DECLARE_FIELD(dim).set_default(0);
+    DMLC_DECLARE_FIELD(pad).set_default(0);
+  }
+};
+DMLC_REGISTER_PARAMETER(PadParam);
+
+inline bool PadShape(const NodeAttrs& attrs,
+                         std::vector<TShape> *ishape,
+                         std::vector<TShape> *oshape) {
+  const auto& param = dmlc::get<PadParam>(attrs.parsed);
+  if (ishape->at(0).ndim() == 0) {
+    return false;
+  }
+  TShape out = ishape->at(0);
+  out[param.dim] += abs(param.pad);
+  oshape->at(0) = out;
+  return true;
+}
+
+NNVM_REGISTER_OP(pad)
+.describe("pads a tensor")
+.set_num_inputs(1)
+.include("nn_module")
+.set_attr_parser(ParamParser<PadParam>)
+.set_attr<FInferShape>("FInferShape", PadShape);
+
+
 struct ConvPoolParam : public dmlc::Parameter<ConvPoolParam> {
   TShape ksize;
   TShape strides;
@@ -263,6 +295,46 @@ NNVM_REGISTER_OP(max_pool)
 .set_attr<FInferShape>("FInferShape", ConvPoolShape);
 
 
+NNVM_REGISTER_OP(avg_pool)
+.describe("Avg pooling")
+.set_num_inputs(1)
+.set_attr_parser(ParamParser<ConvPoolParam>)
+.include("nn_module")
+.set_attr<FInferShape>("FInferShape", ConvPoolShape);
+
+
+struct BatchNormalizationParam : public dmlc::Parameter<BatchNormalizationParam> {
+  std::string name;
+  DMLC_DECLARE_PARAMETER(BatchNormalizationParam) {
+    DMLC_DECLARE_FIELD(name).set_default("batch_normalization");
+  }
+};
+DMLC_REGISTER_PARAMETER(BatchNormalizationParam);
+
+inline bool BatchNormalizationShape(const NodeAttrs& attrs,
+                                    std::vector<TShape> *ishape,
+                                    std::vector<TShape> *oshape) {
+  if (ishape->at(0).ndim() == 0) return false;
+  const TShape& in = ishape->at(0);
+  CHECK_EQ(in.ndim(), 4);
+  TShape mean = TShape{in[1]};
+  SHAPE_ASSIGN(ishape->at(1), mean);
+  SHAPE_ASSIGN(ishape->at(2), mean);
+  oshape->at(0) = in;
+  return true;
+}
+
+NNVM_REGISTER_OP(batch_normalization)
+.describe("batch normalization")
+.set_num_inputs(3)
+.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data", "gamma", "beta"};
+})
+.set_attr_parser(ParamParser<BatchNormalizationParam>)
+.include("nn_module")
+.set_attr<FInferShape>("FInferShape", BatchNormalizationShape);
+
+
 NNVM_REGISTER_OP(mean_sparse_softmax_cross_entropy_with_logits)
 .describe("Softmax cross entropy given logit and label")
 .set_num_inputs(2)
diff --git a/src/op_tensor.cc b/src/op_tensor.cc
@@ -89,6 +89,18 @@ NNVM_REGISTER_OP(equal)
 .set_attr<FInferShape>("FInferShape", SameShape);
 
 
+NNVM_REGISTER_OP(__ewise_sum__)
+.describe("ewise sum")
+.set_num_inputs(nnvm::kVarg)
+.set_attr<FInplaceOption>("FInplaceOption", InplaceIn0Out0)
+.set_attr<FInferShape>("FInferShape", SameShape)
+.set_attr<FGradient>(
+    "FGradient", [](const NodePtr& n,
+                    const std::vector<NodeEntry>& ograds) {
+      return std::vector<NodeEntry>(n->num_inputs(), ograds[0]);
+});
+
+
 NNVM_REGISTER_OP(__add_symbol__)
 .describe("add two data together")
 .set_num_inputs(2)
diff --git a/src/torch/op_nn_torch.cc b/src/torch/op_nn_torch.cc
diff --git a/src/torch/op_tensor_torch.cc b/src/torch/op_tensor_torch.cc
diff --git a/tests/python/test_ops.py b/tests/python/test_ops.py