多層LSTM的坑:如何定義多層LSTM?
阿新 • • 發佈:2018-11-10
多層LSTM定義的兩種方式,其中方式2是正確的,方式1揮出現錯誤:
多層LSTM需要每次都定義一個新的BasicCell, 而不是定義一個BasicCell之後多次呼叫。
def lstm_model(X,Y,is_training): ###方式1:錯誤!! #cell_unit = tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) #cell = tf.nn.rnn_cell.MultiRNNCell([ # cell_unit for _ in range(NUM_LAYERS)]) ###方式2:正確 cell = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)]) outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) output = outputs[:,-1,:] predictions = tf.contrib.layers.fully_connected( output,1,activation_fn=None ) if not is_training: return predictions,None,None loss = tf.losses.mean_squared_error(labels=Y,predictions=predictions) train_op = tf.contrib.layers.optimize_loss( loss,tf.train.get_global_step(), optimizer = 'Adagrad',learning_rate = 0.1) return predictions,loss,train_op
完整示例程式的程式碼:
#-*-coding:utf-8-*- import numpy as np import tensorflow as tf import matplotlib as mpl mpl.use('Agg') from matplotlib import pyplot as plt HIDDEN_SIZE = 30 NUM_LAYERS = 2 TIME_STEPS = 10 TRAINING_STEPS = 10000 BATCH_SIZE = 32 TRAINING_EXAMPLES=10000 TESTING_EXAMPLES = 1000 SAMPLE_GAP = 0.01 def generate_data(seq): X=[]#ninihaoniaho zh Y=[] for i in range(len(seq)-TIME_STEPS): X.append([seq[i:i+TIME_STEPS]]) Y.append([seq[i+TIME_STEPS]]) return np.array(X,dtype=np.float32),np.array(Y,dtype=np.float32) def lstm_model(X,Y,is_training): # cell_unit = tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) cell = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)]) outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) output = outputs[:,-1,:] predictions = tf.contrib.layers.fully_connected( output,1,activation_fn=None ) if not is_training: return predictions,None,None loss = tf.losses.mean_squared_error(labels=Y,predictions=predictions) train_op = tf.contrib.layers.optimize_loss( loss,tf.train.get_global_step(), optimizer = 'Adagrad',learning_rate = 0.1) return predictions,loss,train_op def train(sess,train_X,train_Y): ds=tf.data.Dataset.from_tensor_slices((train_X,train_Y)) ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE) X,Y = ds.make_one_shot_iterator().get_next() ### use model get result with tf.variable_scope('model'): predictions,loss,train_op = lstm_model(X,Y,True) sess.run(tf.global_variables_initializer()) for i in range(TRAINING_STEPS): _,l = sess.run([train_op,loss]) if i%100 ==0: print('train step:'+str(i) + ',loss:'+str(l)) def run_eval(sess, test_X, test_y): # print('------------runned') # 將測試資料以資料集的方式提供給計算圖。 ds = tf.data.Dataset.from_tensor_slices((test_X, test_y)) ds = ds.batch(1) X, y = ds.make_one_shot_iterator().get_next() # 呼叫模型得到計算結果。這裡不需要輸入真實的y值。 with tf.variable_scope("model", reuse=True): prediction, _, _ = lstm_model(X, [0.0], False) # 將預測結果存入一個數組。 predictions = [] labels = [] for i in range(TESTING_EXAMPLES): p, l = sess.run([prediction, y]) predictions.append(p) labels.append(l) # 計算rmse作為評價指標。 predictions = np.array(predictions).squeeze() labels = np.array(labels).squeeze() rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0)) print("Root Mean Square Error is: %f" % rmse) # 對預測的sin函式曲線進行繪圖。 plt.figure() plt.plot(predictions, label='predictions') plt.plot(labels, label='real_sin') plt.legend() plt.show() test_start = (TRAINING_EXAMPLES+TIME_STEPS)*SAMPLE_GAP test_end = test_start + (TESTING_EXAMPLES+TIME_STEPS)*SAMPLE_GAP train_X,train_Y = generate_data(np.sin(np.linspace( test_start,test_end,TRAINING_EXAMPLES+TIME_STEPS,dtype=np.float32))) test_X,test_Y = generate_data(np.sin(np.linspace( test_start,test_end,TESTING_EXAMPLES+TIME_STEPS,dtype=np.float32))) with tf.Session() as sess: train(sess,train_X,train_Y) run_eval(sess,test_X,test_Y)