Support scenario like regression

tobegit3hub · tobegit3hub · commit f4385ba09b3b · 2017-09-13T16:11:46.000+08:00
diff --git a/README.md b/README.md
@@ -26,11 +26,12 @@ Following are the supported features.
   - [x] Validate acc/auc
   - [x] Inference online
   - [x] Inference offline
-- [x] Network Model
+- [x] Network Models
   - [x] Logistic regression
   - [x] Deep neural network
   - [x] Convolution neural network
   - [x] Wide and deep model
+  - [x] Regression model
   - [x] Customized models
 - [x] Others
   - [x] Checkpoint
@@ -95,6 +96,12 @@ If you want to use CNN model, try this command.
 ./dense_classifier.py --train_file ./data/lung/fa7a21165ae152b13def786e6afc3edf.dcm.csv.tfrecords --validate_file ./data/lung/fa7a21165ae152b13def786e6afc3edf.dcm.csv.tfrecords --feature_size 262144 --label_size 2 --batch_size 2 --validate_batch_size 2 --epoch_number -1 --model cnn
 ```
 
+For [boston housing](./data/boston_housing/) dataset.
+
+```
+./dense_classifier.py --train_file ./data/boston_housing/train.csv.tfrecords --validate_file ./data/boston_housing/train.csv.tfrecords --feature_size 13 --label_size 1 --scenario regression  --batch_size 1 --validate_batch_size 1
+```
+
 ### Export The Model
 
 After training, it will export the model automatically. Or you can export manually.
diff --git a/data/boston_housing/README.md b/data/boston_housing/README.md
@@ -0,0 +1,4 @@
+
+## Data
+
+The files are from https://inclass.kaggle.com/c/boston-housing .
diff --git a/data/boston_housing/generate_csv_tfrecords.py b/data/boston_housing/generate_csv_tfrecords.py
@@ -11,7 +11,7 @@ def generate_tfrecords(input_filename, output_filename):
   index = 0
   for line in open(input_filename, "r"):
     index += 1
-    
+
     # Ignore the first line
     if index == 1:
       continue
@@ -20,17 +20,18 @@ def generate_tfrecords(input_filename, output_filename):
     label = float(data[14])
     features = [float(i) for i in data[1:14]]
 
-    example = tf.train.Example(features=tf.train.Features(feature={
-        "label":
-        tf.train.Feature(float_list=tf.train.FloatList(value=[label])),
-        "features":
-        tf.train.Feature(float_list=tf.train.FloatList(value=features)),
-    }))
+    example = tf.train.Example(features=tf.train.Features(
+        feature={
+            "label":
+            tf.train.Feature(float_list=tf.train.FloatList(value=[label])),
+            "features":
+            tf.train.Feature(float_list=tf.train.FloatList(value=features)),
+        }))
     writer.write(example.SerializeToString())
 
   writer.close()
-  print("Successfully convert {} to {}".format(input_filename,
-                                               output_filename))
+  print(
+      "Successfully convert {} to {}".format(input_filename, output_filename))
 
 
 def main():
diff --git a/dense_classifier.py b/dense_classifier.py
@@ -42,6 +42,8 @@
                     "The path of checkpoint")
 flags.DEFINE_string("output_path", "./tensorboard/",
                     "The path of tensorboard event files")
+flags.DEFINE_string("scenario", "classification",
+                    "Support classification and regression")
 flags.DEFINE_string("model", "dnn", "Support dnn, lr, wide_and_deep")
 flags.DEFINE_string("model_network", "128 32 8", "The neural network of model")
 flags.DEFINE_boolean("enable_bn", False, "Enable batch normalization or not")
@@ -86,6 +88,7 @@ def main():
   MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue
   BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE
   MODE = FLAGS.mode
+  SCENARIO = FLAGS.scenario
   MODEL = FLAGS.model
   CHECKPOINT_PATH = FLAGS.checkpoint_path
   if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists(
@@ -311,10 +314,19 @@ def inference(inputs, is_train=True):
   logging.info("Use the model: {}, model network: {}".format(
       MODEL, FLAGS.model_network))
   logits = inference(batch_features, True)
-  batch_labels = tf.to_int64(batch_labels)
-  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
-      logits=logits, labels=batch_labels)
-  loss = tf.reduce_mean(cross_entropy, name="loss")
+
+  if SCENARIO == "classification":
+    batch_labels = tf.to_int64(batch_labels)
+    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        logits=logits, labels=batch_labels)
+    loss = tf.reduce_mean(cross_entropy, name="loss")
+  elif SCENARIO == "regression":
+    msl = tf.square(logits - batch_labels, name="msl")
+    loss = tf.reduce_mean(msl, name="loss")
+  else:
+    logging.error("Unknow scenario: {}".format(SCENARIO))
+    return
+
   global_step = tf.Variable(0, name="global_step", trainable=False)
   if FLAGS.enable_lr_decay:
     logging.info(
@@ -332,6 +344,10 @@ def inference(inputs, is_train=True):
   train_op = optimizer.minimize(loss, global_step=global_step)
   tf.get_variable_scope().reuse_variables()
 
+  # Avoid error when not using acc and auc op
+  if SCENARIO == "regression":
+    batch_labels = tf.to_int64(batch_labels)
+
   # Define accuracy op for train data
   train_accuracy_logits = inference(batch_features, False)
   train_softmax = tf.nn.softmax(train_accuracy_logits)
@@ -395,10 +411,11 @@ def inference(inputs, is_train=True):
   # Initialize saver and summary
   saver = tf.train.Saver()
   tf.summary.scalar("loss", loss)
-  tf.summary.scalar("train_accuracy", train_accuracy)
-  tf.summary.scalar("train_auc", train_auc)
-  tf.summary.scalar("validate_accuracy", validate_accuracy)
-  tf.summary.scalar("validate_auc", validate_auc)
+  if SCENARIO == "classification":
+    tf.summary.scalar("train_accuracy", train_accuracy)
+    tf.summary.scalar("train_auc", train_auc)
+    tf.summary.scalar("validate_accuracy", validate_accuracy)
+    tf.summary.scalar("validate_auc", validate_auc)
   summary_op = tf.summary.merge_all()
   init_op = [
       tf.global_variables_initializer(),
@@ -427,17 +444,24 @@ def inference(inputs, is_train=True):
 
             # Print state while training
             if step % FLAGS.steps_to_validate == 0:
-              loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run(
-                  [
-                      loss, train_accuracy, train_auc, validate_accuracy,
-                      validate_auc, summary_op
-                  ])
-              end_time = datetime.datetime.now()
-              logging.info(
-                  "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".
-                  format(end_time - start_time, step, loss_value,
-                         train_accuracy_value, train_auc_value,
-                         validate_accuracy_value, validate_auc_value))
+              if SCENARIO == "classification":
+                loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run(
+                    [
+                        loss, train_accuracy, train_auc, validate_accuracy,
+                        validate_auc, summary_op
+                    ])
+                end_time = datetime.datetime.now()
+                logging.info(
+                    "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".
+                    format(end_time - start_time, step, loss_value,
+                           train_accuracy_value, train_auc_value,
+                           validate_accuracy_value, validate_auc_value))
+              elif SCENARIO == "regression":
+                loss_value, summary_value = sess.run([loss, summary_op])
+                end_time = datetime.datetime.now()
+                logging.info("[{}] Step: {}, loss: {}".format(
+                    end_time - start_time, step, loss_value))
+
               writer.add_summary(summary_value, step)
               saver.save(sess, CHECKPOINT_FILE, global_step=step)
               start_time = end_time

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
++
 +## Data
++
 +The files are from https://inclass.kaggle.com/c/boston-housing .