【Tensorflow學習】 RNN
阿新 • • 發佈:2018-12-13
cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
init_state = cell.zero_state(batch_size, dtype=tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)
outputs為【80,28,128】 80為batch_size 28為steps
final_state形狀為【2,80,128】,包括(c_n,h_n),其中c_n為長線記憶【80,128】,h_n為短線記憶【80,128】
results = tf.matmul(final_state[1], weights['out']) + biases['out']
舊的例子:
# View more python learning tutorial on my Youtube and Youku channel!!! # Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg # Youku video tutorial: http://i.youku.com/pythontutorial """ This code is a modified version of the code from this link: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py His code is a very good one for RNN beginners. Feel free to check it out. """ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data # set random seed for comparing the two result calculations tf.set_random_seed(1) # this is data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # hyperparameters lr = 0.001 training_iters = 100000 batch_size = 80 n_inputs = 28 # MNIST data input (img shape: 28*28) n_steps = 28 # time steps n_hidden_units = 128 # neurons in hidden layer n_classes = 10 # MNIST classes (0-9 digits) # tf Graph input x = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) y = tf.placeholder(tf.float32, [None, n_classes]) # Define weights weights = { # (28, 128) 'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])), # (128, 10) 'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes])) } biases = { # (128, ) 'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])), # (10, ) 'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ])) } def RNN(X, weights, biases): # hidden layer for input to cell # transpose the inputs shape from # X (80,28,28) ==> (80 batch * 28 steps, 28 inputs) X = tf.reshape(X, [-1, n_inputs]) # into hidden # X_in = (80 batch * 28 steps, 128 hidden) X_in = tf.matmul(X, weights['in']) + biases['in'] # X_in ==> (80 batch, 28 steps, 128 hidden) X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units]) # basic LSTM Cell. cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units) #128 # lstm cell is divided into two parts (c_state, h_state) init_state = cell.zero_state(batch_size, dtype=tf.float32) # 下一步有兩個選擇 # 1: tf.nn.rnn(cell, inputs); # 2: tf.nn.dynamic_rnn(cell, inputs). # If use option 1, you have to modified the shape of X_in, go and check out this: # https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py # In here, we go for option 2. # dynamic_rnn的輸入X_in為 (batch, steps, inputs) 或 (steps, batch, inputs) # 這裡為 (batch, steps, inputs) # time_major的意思是:是否steps為第一個引數,這裡不是,則false. outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False) ############################################# #print(sess.run(tf.shape(outputs))) #[ 80 28 128] #print(sess.run(tf.shape(final_state))) # [ 2 80 128] #final_state包括(c_state, h_state)長線記憶和短線記憶 ############################################# # hidden layer for output as the final results #第一種方法 #final_state為[2,80,128] 則final_state[1]為[80,128] #results = tf.matmul(final_state[1], weights['out']) + biases['out'] #第二種方法 #outputs = tf.transpose(outputs, [1,0,2]) 這條語句也可以的 outputs = tf.unstack(tf.transpose(outputs, [1,0,2])) results = tf.matmul(outputs[-1], weights['out']) + biases['out'] # shape = (128, 10) return results pred = RNN(x, weights, biases) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) train_op = tf.train.AdamOptimizer(lr).minimize(cost) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) step = 0 while step * batch_size < training_iters: batch_xs, batch_ys = mnist.train.next_batch(batch_size) batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs]) sess.run([train_op], feed_dict={x: batch_xs,y: batch_ys,}) if step % 20 == 0: print(sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,})) step += 1
新的例子:
""" Know more, visit my Python tutorial page: https://morvanzhou.github.io/tutorials/ My Youtube Channel: https://www.youtube.com/user/MorvanZhou Dependencies: tensorflow: 1.1.0 matplotlib numpy """ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt tf.set_random_seed(1) np.random.seed(1) # Hyper Parameters BATCH_SIZE = 80 TIME_STEP = 28 # rnn time step / image height INPUT_SIZE = 28 # rnn input size / image width LR = 0.01 # learning rate # data mnist = input_data.read_data_sets('./mnist', one_hot=True) # they has been normalized to range (0,1) test_x = mnist.test.images[:2000] test_y = mnist.test.labels[:2000] print(mnist.train.images.shape) # (55000, 28 * 28) print(mnist.train.labels.shape) # (55000, 10) # tensorflow placeholders tf_x = tf.placeholder(tf.float32, [None, TIME_STEP * INPUT_SIZE]) # shape(batch, 784) image = tf.reshape(tf_x, [-1, TIME_STEP, INPUT_SIZE]) # (batch, height, width, channel) tf_y = tf.placeholder(tf.int32, [None, 10]) # input y # RNN rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_units=64) outputs, (h_c, h_n) = tf.nn.dynamic_rnn( rnn_cell, # cell you have chosen image, # input initial_state=None, # the initial hidden state dtype=tf.float32, # must given if set initial_state = None time_major=False, # False: (batch, time step, input); True: (time step, batch, input) ) output = tf.layers.dense(outputs[:, -1, :], 10) # output based on the last output step loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=output) # compute cost train_op = tf.train.AdamOptimizer(LR).minimize(loss) accuracy = tf.metrics.accuracy( # return (acc, update_op), and create 2 local variables labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(output, axis=1),)[1] sess = tf.Session() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # the local var is for accuracy_op sess.run(init_op) # initialize var in graph for step in range(1200): # training b_x, b_y = mnist.train.next_batch(BATCH_SIZE) _, loss_ = sess.run([train_op, loss], {tf_x: b_x, tf_y: b_y}) if step % 50 == 0: # testing accuracy_ = sess.run(accuracy, {tf_x: test_x, tf_y: test_y}) print('train loss: %.4f' % loss_, '| test accuracy: %.2f' % accuracy_) # print 10 predictions from test data test_output = sess.run(output, {tf_x: test_x[:10]}) pred_y = np.argmax(test_output, 1) print(pred_y, 'prediction number') print(np.argmax(test_y[:10], 1), 'real number')