add bilstm examples

XierHacker · XierHacker · commit b13b9ac1c8e1 · 2017-12-20T19:29:34.000+08:00
diff --git a/14.RNN&LSTM/RNN.py b/14.RNN&LSTM/RNN.py
@@ -1,6 +1,123 @@
-sum=0
-for i in range(1,101):
-    sum+=i*(i+1)*(2*i+3)
+""" Bi-directional Recurrent Neural Network.
+A Bi-directional Recurrent Neural Network (LSTM) implementation example using 
+TensorFlow library. This example is using the MNIST database of handwritten 
+digits (http://yann.lecun.com/exdb/mnist/)
+Links:
+    [Long Short Term Memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf)
+    [MNIST Dataset](http://yann.lecun.com/exdb/mnist/).
+Author: Aymeric Damien
+Project: https://github.com/aymericdamien/TensorFlow-Examples/
+"""
 
-print(sum)
+from __future__ import print_function
 
+import tensorflow as tf
+from tensorflow.contrib import rnn
+import numpy as np
+
+# Import MNIST data
+from tensorflow.examples.tutorials.mnist import input_data
+mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
+
+'''
+To classify images using a bidirectional recurrent neural network, we consider
+every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
+we will then handle 28 sequences of 28 steps for every sample.
+'''
+
+# Training Parameters
+learning_rate = 0.001
+training_steps = 10000
+batch_size = 128
+display_step = 200
+
+# Network Parameters
+num_input = 28 # MNIST data input (img shape: 28*28)
+timesteps = 28 # timesteps
+num_hidden = 128 # hidden layer num of features
+num_classes = 10 # MNIST total classes (0-9 digits)
+
+# tf Graph input
+X = tf.placeholder("float", [None, timesteps, num_input])
+Y = tf.placeholder("float", [None, num_classes])
+
+# Define weights
+weights = {
+    # Hidden layer weights => 2*n_hidden because of forward + backward cells
+    'out': tf.Variable(tf.random_normal([2*num_hidden, num_classes]))
+}
+biases = {
+    'out': tf.Variable(tf.random_normal([num_classes]))
+}
+
+
+def BiRNN(x, weights, biases):
+
+    # Prepare data shape to match `rnn` function requirements
+    # Current data input shape: (batch_size, timesteps, n_input)
+    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)
+
+    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
+    x = tf.unstack(x, timesteps, 1)
+
+    # Define lstm cells with tensorflow
+    # Forward direction cell
+    lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
+    # Backward direction cell
+    lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
+
+    # Get lstm cell output
+    try:
+        outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
+                                              dtype=tf.float32)
+    except Exception: # Old TensorFlow version only returns outputs not states
+        outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
+                                        dtype=tf.float32)
+
+    # Linear activation, using rnn inner loop last output
+    return tf.matmul(outputs[-1], weights['out']) + biases['out']
+
+logits = BiRNN(X, weights, biases)
+prediction = tf.nn.softmax(logits)
+
+# Define loss and optimizer
+loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
+    logits=logits, labels=Y))
+optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
+train_op = optimizer.minimize(loss_op)
+
+# Evaluate model (with test logits, for dropout to be disabled)
+correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
+accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
+
+# Initialize the variables (i.e. assign their default value)
+init = tf.global_variables_initializer()
+
+# Start training
+with tf.Session() as sess:
+
+    # Run the initializer
+    sess.run(init)
+
+    for step in range(1, training_steps+1):
+        batch_x, batch_y = mnist.train.next_batch(batch_size)
+        # Reshape data to get 28 seq of 28 elements
+        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
+        # Run optimization op (backprop)
+        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
+        if step % display_step == 0 or step == 1:
+            # Calculate batch loss and accuracy
+            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
+                                                                 Y: batch_y})
+            print("Step " + str(step) + ", Minibatch Loss= " + \
+                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
+                  "{:.3f}".format(acc))
+
+    print("Optimization Finished!")
+
+    # Calculate accuracy for 128 mnist test images
+    test_len = 128
+    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
+    test_label = mnist.test.labels[:test_len]
+    print("Testing Accuracy:", \
+        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))
diff --git a/14.RNN&LSTM/bilstm_mnist.py b/14.RNN&LSTM/bilstm_mnist.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+import tensorflow.contrib.rnn as rnn
+import matplotlib.pyplot as plt
+
+
+TIME_STEPS=28
+BATCH_SIZE=128
+HIDDEN_UNITS1=30
+HIDDEN_UNITS=10
+LEARNING_RATE=0.001
+EPOCH=50
+
+TRAIN_EXAMPLES=42000
+TEST_EXAMPLES=28000
+
+#------------------------------------Generate Data-----------------------------------------------#
+#generate data
+train_frame = pd.read_csv("../Mnist/train.csv")
+test_frame = pd.read_csv("../Mnist/test.csv")
+
+# pop the labels and one-hot coding
+train_labels_frame = train_frame.pop("label")
+
+# get values
+# one-hot on labels
+X_train = train_frame.astype(np.float32).values
+y_train=pd.get_dummies(data=train_labels_frame).values
+X_test = test_frame.astype(np.float32).values
+
+#trans the shape to (batch,time_steps,input_size)
+X_train=np.reshape(X_train,newshape=(-1,28,28))
+X_test=np.reshape(X_test,newshape=(-1,28,28))
+#print(X_train.shape)
+#print(y_dummy.shape)
+#print(X_test.shape)
+
+#-----------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------Define Graph---------------------------------------------------#
+graph=tf.Graph()
+with graph.as_default():
+
+    #------------------------------------construct LSTM------------------------------------------#
+    #place hoder
+    X_p=tf.placeholder(dtype=tf.float32,shape=(None,TIME_STEPS,28),name="input_placeholder")
+    y_p=tf.placeholder(dtype=tf.float32,shape=(None,10),name="pred_placeholder")
+
+    #lstm instance
+    lstm_forward=rnn.BasicLSTMCell(num_units=HIDDEN_UNITS)
+    lstm_backward=rnn.BasicLSTMCell(num_units=HIDDEN_UNITS)
+
+    outputs,states=tf.nn.bidirectional_dynamic_rnn(
+        cell_fw=lstm_forward,
+        cell_bw=lstm_backward,
+        inputs=X_p,
+        dtype=tf.float32
+    )
+
+    outputs_fw=outputs[0]
+    outputs_bw = outputs[1]
+    h=outputs_fw[:,-1,:]+outputs_bw[:,-1,:]
+   # print(h.shape)
+    #---------------------------------------;-----------------------------------------------------#
+
+    #---------------------------------define loss and optimizer----------------------------------#
+    cross_loss=tf.losses.softmax_cross_entropy(onehot_labels=y_p,logits=h)
+    #print(loss.shape)
+
+    correct_prediction = tf.equal(tf.argmax(h, 1), tf.argmax(y_p, 1))
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
+
+    optimizer=tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss=cross_loss)
+
+    init=tf.global_variables_initializer()
+
+
+#-------------------------------------------Define Session---------------------------------------#
+with tf.Session(graph=graph) as sess:
+    sess.run(init)
+    for epoch in range(1,EPOCH+1):
+        #results = np.zeros(shape=(TEST_EXAMPLES, 10))
+        train_losses=[]
+        accus=[]
+        #test_losses=[]
+        print("epoch:",epoch)
+        for j in range(TRAIN_EXAMPLES//BATCH_SIZE):
+            _,train_loss,accu=sess.run(
+                    fetches=(optimizer,cross_loss,accuracy),
+                    feed_dict={
+                            X_p:X_train[j*BATCH_SIZE:(j+1)*BATCH_SIZE],
+                            y_p:y_train[j*BATCH_SIZE:(j+1)*BATCH_SIZE]
+                        }
+            )
+            train_losses.append(train_loss)
+            accus.append(accu)
+        print("average training loss:", sum(train_losses) / len(train_losses))
+        print("accuracy:",sum(accus)/len(accus))
diff --git a/14.RNN&LSTM/bilstm_mnist2.py b/14.RNN&LSTM/bilstm_mnist2.py
@@ -0,0 +1,105 @@
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+import tensorflow.contrib.rnn as rnn
+import matplotlib.pyplot as plt
+
+
+TIME_STEPS=28
+BATCH_SIZE=128
+HIDDEN_UNITS1=30
+HIDDEN_UNITS=10
+LEARNING_RATE=0.001
+EPOCH=50
+
+TRAIN_EXAMPLES=42000
+TEST_EXAMPLES=28000
+
+#------------------------------------Generate Data-----------------------------------------------#
+#generate data
+train_frame = pd.read_csv("../Mnist/train.csv")
+test_frame = pd.read_csv("../Mnist/test.csv")
+
+# pop the labels and one-hot coding
+train_labels_frame = train_frame.pop("label")
+
+# get values
+# one-hot on labels
+X_train = train_frame.astype(np.float32).values
+y_train=pd.get_dummies(data=train_labels_frame).values
+X_test = test_frame.astype(np.float32).values
+
+#trans the shape to (batch,time_steps,input_size)
+X_train=np.reshape(X_train,newshape=(-1,28,28))
+X_test=np.reshape(X_test,newshape=(-1,28,28))
+#print(X_train.shape)
+#print(y_dummy.shape)
+#print(X_test.shape)
+
+#-----------------------------------------------------------------------------------------------------#
+
+
+#--------------------------------------Define Graph---------------------------------------------------#
+graph=tf.Graph()
+with graph.as_default():
+
+    #------------------------------------construct LSTM------------------------------------------#
+    #place hoder
+    X_p=tf.placeholder(dtype=tf.float32,shape=(None,TIME_STEPS,28),name="input_placeholder")
+    y_p=tf.placeholder(dtype=tf.float32,shape=(None,10),name="pred_placeholder")
+
+    #lstm instance
+    lstm_forward_1=rnn.BasicLSTMCell(num_units=HIDDEN_UNITS1)
+    lstm_forward_2=rnn.BasicLSTMCell(num_units=HIDDEN_UNITS)
+    lstm_forward=rnn.MultiRNNCell(cells=[lstm_forward_1,lstm_forward_2])
+
+    lstm_backward_1 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS1)
+    lstm_backward_2 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS)
+    lstm_backward=rnn.MultiRNNCell(cells=[lstm_backward_1,lstm_backward_2])
+
+    outputs,states=tf.nn.bidirectional_dynamic_rnn(
+        cell_fw=lstm_forward,
+        cell_bw=lstm_backward,
+        inputs=X_p,
+        dtype=tf.float32
+    )
+
+    outputs_fw=outputs[0]
+    outputs_bw = outputs[1]
+    h=outputs_fw[:,-1,:]+outputs_bw[:,-1,:]
+   # print(h.shape)
+    #---------------------------------------;-----------------------------------------------------#
+
+    #---------------------------------define loss and optimizer----------------------------------#
+    cross_loss=tf.losses.softmax_cross_entropy(onehot_labels=y_p,logits=h)
+    #print(loss.shape)
+
+    correct_prediction = tf.equal(tf.argmax(h, 1), tf.argmax(y_p, 1))
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
+
+    optimizer=tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss=cross_loss)
+
+    init=tf.global_variables_initializer()
+
+
+#-------------------------------------------Define Session---------------------------------------#
+with tf.Session(graph=graph) as sess:
+    sess.run(init)
+    for epoch in range(1,EPOCH+1):
+        #results = np.zeros(shape=(TEST_EXAMPLES, 10))
+        train_losses=[]
+        accus=[]
+        #test_losses=[]
+        print("epoch:",epoch)
+        for j in range(TRAIN_EXAMPLES//BATCH_SIZE):
+            _,train_loss,accu=sess.run(
+                    fetches=(optimizer,cross_loss,accuracy),
+                    feed_dict={
+                            X_p:X_train[j*BATCH_SIZE:(j+1)*BATCH_SIZE],
+                            y_p:y_train[j*BATCH_SIZE:(j+1)*BATCH_SIZE]
+                        }
+            )
+            train_losses.append(train_loss)
+            accus.append(accu)
+        print("average training loss:", sum(train_losses) / len(train_losses))
+        print("accuracy:",sum(accus)/len(accus))