Support lstm, bidirectional-lstm, gru models

tobegit3hub · tobegit3hub · commit 463cad06924f · 2018-05-09T11:38:50.000+08:00
diff --git a/dense_classifier.py b/dense_classifier.py
@@ -31,6 +31,9 @@ def define_flags():
   flags.DEFINE_boolean("resume_from_checkpoint", True, "Resume or not")
   flags.DEFINE_string("scenario", "classification",
                       "Support classification, regression")
+  flags.DEFINE_string(
+      "loss", "sparse_cross_entropy",
+      "Support sparse_cross_entropy, cross_entropy, mean_square")
   flags.DEFINE_integer("feature_size", 9, "Number of feature size")
   flags.DEFINE_integer("label_size", 2, "Number of label size")
   flags.DEFINE_string("file_format", "tfrecords", "Support tfrecords, csv")
@@ -47,8 +50,10 @@ def define_flags():
   flags.DEFINE_string("optimizer", "adagrad",
                       "Support sgd, adadelta, adagrad, adam, ftrl, rmsprop")
   flags.DEFINE_float("learning_rate", 0.01, "Learning rate")
-  flags.DEFINE_string("model", "dnn",
-                      "Support dnn, lr, wide_and_deep, customized, cnn")
+  flags.DEFINE_string(
+      "model", "dnn",
+      "Support dnn, lr, wide_and_deep, customized, cnn, lstm, bidirectional_lstm, gru"
+  )
   flags.DEFINE_string("dnn_struct", "128 32 8", "DNN struct")
   flags.DEFINE_integer("epoch_number", 100, "Number of epoches")
   flags.DEFINE_integer("train_batch_size", 64, "Batch size")
@@ -71,11 +76,17 @@ def define_flags():
   # Check parameters
   assert (FLAGS.mode in ["train", "inference", "savedmodel"])
   assert (FLAGS.scenario in ["classification", "regression"])
+  assert (FLAGS.loss in [
+      "sparse_cross_entropy", "cross_entropy", "mean_square"
+  ])
   assert (FLAGS.file_format in ["tfrecords", "csv"])
   assert (FLAGS.optimizer in [
       "sgd", "adadelta", "adagrad", "adam", "ftrl", "rmsprop"
   ])
-  assert (FLAGS.model in ["dnn", "lr", "wide_and_deep", "customized", "cnn"])
+  assert (FLAGS.model in [
+      "dnn", "lr", "wide_and_deep", "customized", "cnn", "customized_cnn",
+      "lstm", "bidirectional_lstm", "gru"
+  ])
 
   # Print flags
   parameter_value_map = {}
@@ -206,36 +217,22 @@ def parse_tfrecords_function(example_proto):
   return parsed_features["features"], parsed_features["label"]
 
 
-# TODO: Change for dataset api
-def read_and_decode_csv_old(filename_queue):
-  # Notice that it supports label in the last column only
-  reader = tf.TextLineReader()
-  key, value = reader.read(filename_queue)
-  record_defaults = [[1.0] for i in range(FLAGS.feature_size)] + [[0]]
-  columns = tf.decode_csv(value, record_defaults=record_defaults)
-  label = columns[-1]
-  features = tf.stack(columns[0:-1])
-  return label, features
-
-
 def parse_csv_function(line):
-  # Metadata describing the text columns
-  COLUMNS = [
-      "feature0", "feature1", "feature2", "feature3", "feature4", "feature5",
-      "feature6", "feature7", "feature8", "label"
-  ]
+  """
+  Decode CSV for Dataset.
+  
+  Args:
+    line: One line data of the CSV.
+  
+  Return:
+    The op of features and labels
+  """
+
   FIELD_DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0],
                     [0.0], [0]]
 
-  # Decode the line into its fields
   fields = tf.decode_csv(line, FIELD_DEFAULTS)
 
-  # Pack the result into a dictionary
-  #features = dict(zip(COLUMNS,fields))
-
-  # Separate the label from the features
-  #label = features.pop("label")
-
   label = fields[-1]
   label = tf.cast(label, tf.int64)
   features = tf.stack(fields[0:-1])
@@ -266,6 +263,18 @@ def inference(inputs, input_units, output_units, is_train=True):
   elif FLAGS.model == "cnn":
     return model.cnn_inference(inputs, input_units, output_units, is_train,
                                FLAGS)
+  elif FLAGS.model == "customized_cnn":
+    return model.customized_cnn_inference(inputs, input_units, output_units,
+                                          is_train, FLAGS)
+  elif FLAGS.model == "lstm":
+    return model.lstm_inference(inputs, input_units, output_units, is_train,
+                                FLAGS)
+  elif FLAGS.model == "bidirectional_lstm":
+    return model.bidirectional_lstm_inference(inputs, input_units,
+                                              output_units, is_train, FLAGS)
+  elif FLAGS.model == "gru":
+    return model.gru_inference(inputs, input_units, output_units, is_train,
+                               FLAGS)
 
 
 logging.basicConfig(level=logging.INFO)
@@ -337,11 +346,19 @@ def main():
   output_units = FLAGS.label_size
   logits = inference(train_features_op, input_units, output_units, True)
 
-  if FLAGS.scenario == "classification":
+  if FLAGS.loss == "sparse_cross_entropy":
     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
         logits=logits, labels=train_label_op)
     loss = tf.reduce_mean(cross_entropy, name="loss")
-  elif FLAGS.scenario == "regression":
+  elif FLAGS.loss == "cross_entropy":
+
+    #train_label_op =
+    #validation_label_op =
+
+    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+        logits=logits, labels=train_label_op)
+    loss = tf.reduce_mean(cross_entropy, name="loss")
+  elif FLAGS.loss == "mean_square":
     msl = tf.square(logits - train_label_op, name="msl")
     loss = tf.reduce_mean(msl, name="loss")
 
diff --git a/model.py b/model.py
@@ -129,6 +129,44 @@ def cnn_inference(inputs, input_units, output_units, is_train=True,
     Define the CNN model.
     """
 
+  # [BATCH_SIZE, 9] -> [BATCH_SIZE, 3, 3, 1]
+  inputs = tf.reshape(inputs, [-1, 3, 3, 1])
+
+  # [BATCH_SIZE, 3, 3, 1] -> [BATCH_SIZE, 3, 3, 8]
+  with tf.variable_scope("conv_0"):
+    weights = tf.get_variable(
+        "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer())
+    bias = tf.get_variable(
+        "bias", [8], initializer=tf.random_normal_initializer())
+
+    layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME")
+    layer = tf.nn.bias_add(layer, bias)
+    layer = tf.nn.relu(layer)
+
+  # [BATCH_SIZE, 3, 3, 8] -> [BATCH_SIZE, 3 * 3 * 8]
+  layer = tf.reshape(layer, [-1, 3 * 3 * 8])
+
+  # [BATCH_SIZE, 3 * 3 * 8] -> [BATCH_SIZE, LABEL_SIZE]
+  with tf.variable_scope("output_layer"):
+    weights = tf.get_variable(
+        "weights", [3 * 3 * 8, FLAGS.label_size],
+        initializer=tf.random_normal_initializer())
+    bias = tf.get_variable(
+        "bias", [FLAGS.label_size], initializer=tf.random_normal_initializer())
+    layer = tf.add(tf.matmul(layer, weights), bias)
+
+  return layer
+
+
+def customized_cnn_inference(inputs,
+                             input_units,
+                             output_units,
+                             is_train=True,
+                             FLAGS=None):
+  """
+    Define the CNN model.
+    """
+
   # TODO: Change if validate_batch_size is different
   # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1]
   inputs = tf.reshape(inputs, [FLAGS.train_batch_size, 512, 512, 1])
@@ -187,6 +225,96 @@ def cnn_inference(inputs, input_units, output_units, is_train=True,
   return layer
 
 
+def lstm_inference(inputs,
+                   input_units,
+                   output_units,
+                   is_train=True,
+                   FLAGS=None):
+
+  RNN_HIDDEN_UNITS = 128
+  timesteps = 3
+  number_input = 3
+
+  weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, output_units]))
+  biases = tf.Variable(tf.random_normal([output_units]))
+
+  #  [BATCH_SIZE, 9] -> [BATCH_SIZE, 3, 3]
+  x = tf.reshape(inputs, [-1, timesteps, number_input])
+
+  # [BATCH_SIZE, 3, 3] -> 3 * [BATCH_SIZE, 3]
+  x = tf.unstack(x, timesteps, 1)
+
+  # output size is 128, state size is (c=128, h=128)
+  lstm_cell = tf.contrib.rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0)
+
+  # outputs is array of 3 * [BATCH_SIZE, 3]
+  outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
+
+  # outputs[-1] is [BATCH_SIZE, 3]
+  layer = tf.matmul(outputs[-1], weights) + biases
+  return layer
+
+
+def bidirectional_lstm_inference(inputs,
+                                 input_units,
+                                 output_units,
+                                 is_train=True,
+                                 FLAGS=None):
+
+  RNN_HIDDEN_UNITS = 128
+  timesteps = 3
+  number_input = 3
+
+  weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, output_units]))
+  biases = tf.Variable(tf.random_normal([output_units]))
+
+  #  [BATCH_SIZE, 9] -> [BATCH_SIZE, 3, 3]
+  x = tf.reshape(inputs, [-1, timesteps, number_input])
+
+  # [BATCH_SIZE, 3, 3] -> 3 * [BATCH_SIZE, 3]
+  x = tf.unstack(x, timesteps, 1)
+
+  # Update the hidden units for bidirection-rnn
+  fw_lstm_cell = tf.contrib.rnn.BasicLSTMCell(
+      RNN_HIDDEN_UNITS / 2, forget_bias=1.0)
+  bw_lstm_cell = tf.contrib.rnn.BasicLSTMCell(
+      RNN_HIDDEN_UNITS / 2, forget_bias=1.0)
+
+  outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(
+      fw_lstm_cell, bw_lstm_cell, x, dtype=tf.float32)
+
+  # outputs[-1] is [BATCH_SIZE, 3]
+  layer = tf.matmul(outputs[-1], weights) + biases
+  return layer
+
+
+def gru_inference(inputs, input_units, output_units, is_train=True,
+                  FLAGS=None):
+
+  RNN_HIDDEN_UNITS = 128
+  timesteps = 3
+  number_input = 3
+
+  weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, output_units]))
+  biases = tf.Variable(tf.random_normal([output_units]))
+
+  #  [BATCH_SIZE, 9] -> [BATCH_SIZE, 3, 3]
+  x = tf.reshape(inputs, [-1, timesteps, number_input])
+
+  # [BATCH_SIZE, 3, 3] -> 3 * [BATCH_SIZE, 3]
+  x = tf.unstack(x, timesteps, 1)
+
+  # output size is 128, state size is (c=128, h=128)
+  lstm_cell = tf.contrib.rnn.GRUCell(RNN_HIDDEN_UNITS)
+
+  # outputs is array of 3 * [BATCH_SIZE, 3]
+  outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
+
+  # outputs[-1] is [BATCH_SIZE, 3]
+  layer = tf.matmul(outputs[-1], weights) + biases
+  return layer
+
+
 def compute_softmax_and_accuracy(logits, labels):
   """
   Compute the softmax and accuracy of the logits and labels.
@@ -227,4 +355,4 @@ def compute_auc(softmax_op, label_op, label_size):
   new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
   _, auc_op = tf.contrib.metrics.streaming_auc(softmax_op, new_batch_labels)
 
-  return auc_op
+  return auc_op