RNN(LSTM)用於分類

阿新 • • 發佈：2019-02-05

import tensorflow as tf
import sys
import random  
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import StratifiedKFold  #StratifiedKFold
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from scipy import interp
# from tensorflow.contrib.keras.python.keras.layers import BatchNormalization
%matplotlib inline
  
# hyperparameters  
lr = 0.001  
training_iters = 10000  
batch_size = 200   #3200/200=16用作訓練 800/200用作測試
  
n_inputs = 35   # 輸入35維的向量  
n_steps = 203   # time steps  
n_hidden_units = 100   # neurons in hidden layer  
n_classes = 2     
  
# tf Graph input  
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) 
y = tf.placeholder(tf.float32, [None, n_classes])  
  
# Define weights  
weights = {  
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),  
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))  
}  
biases = {  
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),  
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))  
}  

indices = label
depth = 2
on_value = 1
off_value = 0
output = tf.one_hot(indices,depth,on_value,off_value,axis=1)

def RNN3(X, weights, biases):  
    # hidden layer for input to cell  
    ########################################  
    #X(128 batch,28 steps,28 inputs)  
    #==>(128*28,28 inputs)  
    X = tf.reshape(X,[-1,n_inputs])  #-1代表的含義是不用我們自己指定這一維的大小，函式會自動計算，但列表中只能存在一個-1    
    #==>(128 batch*28 steps,128 hidden)  
    X_in = tf.matmul(X,weights['in'])+biases['in']  
    #==>(128 batch,28 steps,128 hidden)  
    X_in = tf.reshape(X_in,[-1,n_steps,n_hidden_units])  
    # cell  
    ##########################################  
    #state_is_tuple=True的時候，state是元組形式，state=(c,h)。如果是False，那麼state是一個由c和h拼接起來的張量，state=tf.concat(1,[c,h])
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    else:
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
    dropout_lstm = tf.contrib.rnn.DropoutWrapper(lstm_cell,output_keep_prob=0.5)
    #lstm cell is divided into two parts(c_state,m_state)  
    _init_state = dropout_lstm.zero_state(batch_size,dtype=tf.float32)  
      
    #choose rnn how to work,lstm just is one kind of rnn,use lstm_cell for active function,set initial_state  
    outputs,final_state = tf.nn.dynamic_rnn(dropout_lstm,X_in,initial_state=_init_state,time_major=False)     
          
    # hidden layer for output as the final results  
    #############################################  
#     results = tf.matmul(final_state[1],weights['out']) + biases['out']     
      
    # # or
    # unpack to list [(batch, outputs)..] * steps
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        outputs = tf.unpack(tf.transpose(outputs, [1, 0, 2]))    # states is the last outputs
    else:
        outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']    # shape = (batch_size, n_classes)
    print(results.shape)
    return results  
  
pred = RNN3(x, weights, biases)  
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y,logits = pred))  #(pred,y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)  

predict_prob = tf.nn.softmax(pred) #得到對應預測標籤的概率值
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))  #返回true/false
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  

if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
    init = tf.initialize_all_variables()
else:
    init = tf.global_variables_initializer()
with tf.Session() as sess:  

    labelR = sess.run(output)
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    cv = StratifiedKFold(label, n_folds=5)
    finalRes = []

    for numFold,(train_index,test_index) in enumerate(cv):
        sess.run(init)

        if len(train_index)<len(label)*4/5:
            train_index=np.append(train_index, [0])
        if len(test_index)<len(label)/5:
            np.append(test_index, [0])
            test_index=np.append(test_index, [0])
        if len(train_index)>len(label)*4/5:
            train_index = train_index[0:len(label)*4/5]
        if len(test_index)>len(label)/5:
            test_index = test_index[0:len(label)/5]
        x_train = [X[i] for i in train_index]
        
        y_train = [labelR[i] for i in train_index]
        x_test = [X[i] for i in test_index]
        y_test = [labelR[i] for i in test_index]
        x_test = np.array(x_test)
        y_test = np.array(y_test)
        
        epoch = 0 #統計迭代所有訓練集的次數
        maxAccuracy = 0 #連續5次不大於驗證集最大準確性則 early stopping
        failNum = 0 #統計連續不大於最大準確性的次數
        while epoch<training_iters:
            index = [i for i in range(len(x_train))]
            for step in range(int(len(x_train)/batch_size)):  #每個batch的資料全部執行完
                indexR = random.sample(index,batch_size) #batch_size=200
                batch_xs = np.array([x_train[i] for i in indexR])
                batch_ys = np.array([y_train[i] for i in indexR])

                batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
                #從index中刪除已訓練的id，避免下次重複訓練
                indexR = set(indexR)
                for ind in indexR:
                    index.remove(ind)
                sess.run([train_op], feed_dict={  
                    x: batch_xs,  
                    y: batch_ys,  
                })
    
            if epoch % 30 == 0:  #每30epoch輸出此刻準確性
                accur = sess.run(accuracy, feed_dict={  
                x: batch_xs,  
                y: batch_ys,  
                })
                print('%s%d%s%f'%('At ',epoch,'th accuracy:',accur) )
                valiAccur = sess.run(accuracy,feed_dict={x:x_test[0:batch_size].reshape([-1, n_steps, n_inputs]),
                                                         y:y_test[0:batch_size]}) #測試集中拿出一份用於驗證集
                if valiAccur > maxAccuracy:
                    maxAccuracy = valiAccur
                    failNum = 0
                else :
                    failNum += 1
                costVal = sess.run(cost, feed_dict={  
                x: batch_xs,  
                y: batch_ys,  
                })
                print('%s%f'%('cost:',costVal))

            if failNum >= 5:
                print('%s%f'%('Accuracy on validation set:',valiAccur))
                break
            epoch += 1
        
        #測試
#         x_test = np.array(x_test)
#         y_test = np.array(y_test)
        x_test = x_test.reshape([-1, n_steps, n_inputs])
        result = []

        prob = [] #儲存最後預測每個label的概率
        final_label = []
        for i in range(1,5):
            
            x_test2 = x_test[batch_size*(i-1):batch_size*i]
#             print(x_test2.shape)
            y_test2 = y_test[batch_size*(i-1):batch_size*i]
            
            temp_prob = sess.run(predict_prob,feed_dict={x:x_test2,y:y_test2})

            temp_label = sess.run(tf.argmax(y_test2, 1))
            final_label.extend(temp_label)
            temp_prob2 = np.array(temp_prob)
            prob.extend(temp_prob2[:,1])

            result.append(sess.run(accuracy,feed_dict={x:x_test2,y:y_test2}))

        fpr, tpr, thresholds = roc_curve(final_label, prob, pos_label=1)
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.6f)' % (numFold, roc_auc))
        
        print('%d%s%f'%(numFold,"th fold accuracy：",np.mean(result)))
        finalRes.append(np.mean(result))
    print("Testing accuracy：",np.mean(finalRes))
    
    plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')#畫對角線
    mean_tpr /= len(cv) 					#在mean_fpr100個點，每個點處插值插值多次取平均
    mean_tpr[-1] = 1.0 						#座標最後一個點為（1,1）
    mean_auc = auc(mean_fpr, mean_tpr)		#計算平均AUC值
    #畫平均ROC曲線
    plt.plot(mean_fpr, mean_tpr, 'k--',label='Mean ROC (area = %0.6f)' % mean_auc, lw=2)

    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")
    plt.show()

本文仍存許多不足之處，例如時間序列資料的長度往往不同，測試時的batch往往不固定等，後期將不斷對其改善。

參考下篇：http://blog.csdn.net/xwd18280820053/article/details/74298397

本文程式碼參考官方示例，有不恰當之處還望各位大神提出來，共同進步！（不斷更新）

RNN(LSTM)用於分類

import tensorflow as tf import sys import random from sklearn.cross_validation import train_test_split from sklearn.cross_validation i

RNN，LSTM用於情感分類問題

1、詞袋定義和keras自帶分詞和編碼工具詞袋定義 n-gram: 是從一個句子中提取的 N 個（或更少）連續單詞的集合 “The cat sat on the mat.”分解為2-gram： {"The", "The cat", "cat", "c

利用Keras開發用於分類問題的雙向LSTM及與LSTM效能的比較

雙向LSTM是傳統LSTM的擴充套件，可以提高序列分類問題的模型效能。在輸入序列為時間問題的分類資料上，雙向LSTM在輸入序列上訓練的模型是兩個而不是一個LSTM。輸入序列中的第一個是原始樣本，第二個是輸入序列的反向樣本。這可以為網路提供額外的上下文，並且可以更快，更全面地學

Tensorflow - Tutorial (7) : 利用 RNN/LSTM 進行手寫數字識別

ddc htm net sets 手寫 n-2 align csdn global 1. 經常使用類 class tf.contrib.rnn.BasicLSTMCell BasicLSTMCell 是最簡單的一個LSTM類。沒有實現clippi

SQL之case when then用法(用於分類統計)

char purge 格式但是統計 spa 比較 log 永遠 ase具有兩種格式。簡單case函數和case搜索函數。 --簡單case函數 case sex when ‘1‘ then ‘男‘ when ‘2‘ then ‘女’

Word Embedding/RNN/LSTM

參考動態線性 () 經典的 lda 統計容易 problem Word Embedding Word Embedding是一種詞的向量表示，比如，對於這樣的“A B A C B F G”的一個序列，也許我們最後能得到：A對應的向量為[0.1 0.6 -0.5]，B對應

機器學習與深度學習系列連載：第二部分深度學習（十四）迴圈神經網路 2（Gated RNN - LSTM ）

迴圈神經網路 2（Gated RNN - LSTM ） simple RNN 具有梯度消失或者梯度爆炸的特點，所以，在實際應用中，帶有門限的RNN模型變種（Gated RNN）起著至關重要的作用，下面我們來進行介紹： LSTM (Long Short-term Memory )

梯度提升決策樹(Gradient Boosting Decision Tree)，用於分類或迴歸。

今天學習了梯度提升決策樹（Gradient Boosting Decision Tree, GBDT），準備寫點東西作為記錄。後續，我會用python 實現GBDT，釋出到我的Github上，敬請Star。梯度提升演算法是一種通用的學習演算法，除了決策樹，還可以使用其它模型作為基學習器。梯度提升演算法的

【sklearn】SVM用於分類（SVC）

API說明：中文：http://sklearn.apachecn.org/cn/0.19.0/modules/svm.html 英文：https://scikit-learn.org/stable/modules/svm.html API使用：（SVC）（Su

基於PTB資料集實現RNN-LSTM迴圈神經網路（智慧填詞）

本篇直入主題，做一篇學習的記錄，在學習RNN的時候，跟著教程敲了一個案例分為處理方法檔案，神經網路模型檔案，訓練方法檔案，測試檔案所有的操作和重要內容都在程式碼中作了詳細的註釋一、目標神經網路模型二、資料集 PT

【Language model】使用RNN LSTM訓練語言模型寫出45°角仰望星空的文章

開篇這篇文章主要是實戰內容，不涉及一些原理介紹，原理介紹為大家提供一些比較好的連結： 1. Understanding LSTM Networks : RNN與LSTM最為著名的文章，貼圖和內容都恰到好處，為研究人員提供很好的參考價值。中文漢化版：（譯

tf.nn.bidirectional_dynamic_rnn和MultiRNNCell構建雙向多層RNN(LSTM)

import tensorflow as tf import numpy as np X = np.random.randn(10, 5, 5) # 輸入資料,批次、序列長度、樣本維度 # 第二個

CNN模型和RNN模型在分類問題中的應用（Tensorflow實現）

在這篇文章中，我們將實現一個卷積神經網路和一個迴圈神經網路語句分類模型。本文提到的模型（rnn和cnn）在一系列文字分類任務（如情緒分析）中實現了良好的分類效能，並且由於模型簡單，方便實現，成為了競賽和實戰中常用的baseline。 cnn-text-classifica

[TensorFlow深度學習深入]實戰三·分別使用DNN,CNN與RNN(LSTM)做文字情感分析(機器如何讀懂人心)

[TensorFlow深度學習深入]實戰三·使用Word2Vec與RNN(LSTM)做文字情感分析(機器如何讀懂人心) 用到了 DNN CNN Word2Vec RNN(LSTM) 不太清楚的可以回顧我們之前的博文。使用了全連線,卷積神經網路與迴

[TensorFlow深度學習入門]實戰十·用RNN(LSTM)做時間序列預測（曲線擬合）

[TensorFlow深度學習入門]實戰十·用RNN(LSTM)做時間序列預測（曲線擬合） %matplotlib inline import os os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" import numpy as np import

[TensorFlow深度學習入門]實戰五·用RNN(LSTM)做手寫數字識別準確率98%+

參考部落格地址，修復了一個小Bug，收斂速度和準確率都略微提升。使用此模型在Kaggle比賽準確率98%+ import os os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" import time import tensorflow as tf im

基於RNN的文字分類模型（Tensorflow）

基於LSTM（Long-Short Term Memory，長短時記憶人工神經網路，RNN的一種）搭建一個文字意圖分類的深度學習模型（基於Python3和Tensorflow1.2），其結構圖如下：如圖1所示，整個模型包括兩部分第一部分：句子特徵提取 Step1 讀

tensorflow小練手案例 RNN lstm 擬合周期函式 sin

附上一個簡單lstm結構的網路，麻雀雖小但是五臟俱全，其中包含了很多優化方法：擬合sin函式 import tensorflow as tf import numpy as np steps = 32 batch_size = 32 train_size = 7000 LSTM_KE

深度學習----基於keras的LSTM三分類的文字情感分析原理及程式碼

文章目錄背景介紹理論介紹 RNN應用場景 word2vec 演算法 Word2Vec：高維來了句向量資料預處理與詞向量模型訓練 LS

RNN, LSTM, GRU 公式總結

Vanilla RNN 參考 RNN wiki 的描述，根據隱層 ht 接受的是上時刻的隱層（hidden layer） ht−1 還是上時刻的輸出（output layer）yt−1，分成了兩種 RNN，定義如下： Elman network 接受上時刻

RNN(LSTM)用於分類

相關推薦