詳解LSTM並使用C++實現
本文先簡單介紹遞迴神經網路,和LSTM,最後po出用c++程式碼實現的LSTM。其實我也是在學習中,但是聽說最好的學習方法就是學會了再試圖給別人講一遍,所以,here we are
我們先舉一個晚餐吃什麼的例子,看圖,由於你是一個強迫症,也就是晚餐吃什麼是由昨天吃什麼決定的,並且三天一個迴圈。
像圖中說的如果昨天吃了pizza今天就必須吃sushi,昨天吃了sushi今天就必須吃waffles。
現在有一種情況,就是你忘記了昨天吃的什麼,那麼你要怎麼推測出今天吃什麼吶,那你就需要知道前天,或者更早之前吃了什麼,然後一步步的推測,推測出今天吃什麼。如下圖
其實這就是一個簡單的RNN實現,通過迴圈,或者說是遞迴的效果,推測出今天吃什麼。
上面是一個RNN執行的簡單例子,接下來我們來看一下另一個例子,寫一本童書。
這個書的詞彙量很小,我們給他排個序,然後放到向量中,就變成下圖。圖中的關聯就是通過之前的訓練得到的。也就是如果現在收到的字是Jane那他很大程度上下一個字會是saw或者是句號。
同樣,我們通過記憶看前一次預測也能增加接下來預測的概率,如圖。
於是這樣我們就可以構建一個簡單的RNN模型。如圖,通過記憶加上新的預測通過一個神經網路和啟用函式得到新的預測。
如果用RNN,大家可能會看出來一些錯誤,如下圖,這是因為這個模型只有很短期的記憶只能參考上一步,不能參考之前的資訊。為了讓模型能夠參考更長遠的記憶我們加入更多內容,這就是RNN上圖。但是這樣吶我們能很明顯的看見他的記憶是冗餘的,也就是說人的記憶是隨著時間的遷移會遺忘,選擇,忽略的。這已經是LSTM的出現了,LSTM就是在RNN的基礎上加了這三個門,對記憶做進一步處理。
現在先加入一個遺忘門,如圖,
加入一個選擇門,在輸出結果之前做一下選擇保留部分結果。這個門會決定哪些結果該留在模型中,哪些作為最終的輸出。
看上圖我們可以看出,每次新的預測資料會和經過遺忘的記憶結合,然後再通過選擇才會輸出最終結果,怎麼預測,怎麼遺忘,怎麼選擇都是有單獨的配一個神經網路來學習的。
然後再加入忽視門,忽視門主要是對新的記憶進行一些忽視,先把近期不是特別相關的結果忽視掉,以免對後面產生影響。如圖
到目前為止就是LSTM的完整模型了。現在我們通過之前的簡單例子來看看LSTM的執行機制。
我們先假設LSTM已經訓練完畢了,也就是說其中模型都已經確定了。
我們當前的資訊是Doug,之前的記憶資訊有Jane saw Spot。我們現在來預測新的資訊。
第一步預測,因為前一個是Doug所以接下來saw的機率很高用白色表示,Doug的概率很低用黑色表示,我們先不考慮忽視路徑,於是這個結構會通過反饋回憶跟遺忘門做乘積,我們假設沒有回憶,接下來就進入了篩選門,很簡單的就把saw篩選出來了。
接著我們要利用這個結果開始迴圈預測下一個結果,在新迴圈中saw是新資訊,同時也是舊預測。如圖因為saw資訊預測出下一個資訊是Doug,Jane,Spot,如圖
同樣先跳過忽視門,到了遺忘門,有了之前的記憶,由於上一次預測結果已經是saw了,所以遺忘掉saw保留了非Doug,所以通過遺忘門後只剩下Jane,Spot了,最後通過篩選門得到Jane,Spot的概率,概率大的那個作為輸出。
這個迴圈通過遺忘門成功避免了Doug saw Doug的錯誤。
接下來是我自己修改的LSTM c++源程式。我主要是通過看這兩幅圖來糾正的。
/*
Neal 2018-04-09
我自己修改的LSTM
*/
#include "lstm_chk.h"
//啟用函式
double sigmoid(double x)
{
return 1.0 / (1.0 + exp(-x));
}
//啟用函式的導數,y為啟用函式值
double dsigmoid(double y)
{
return y * (1.0 - y);
}
//tanh的導數,y為tanh值
double dtanh(double y)
{
y = tanh(y);
return 1.0 - y * y;
}
void winit(double w[], int n) //權值初始化
{
for(int i=0; i<n; i++)
w[i] = uniform_plus_minus_one; //均勻隨機分佈
}
void RNN::chooseVMtoInt(int num)
{
switch(num){
case 1: intWeight1(); break;
case 2: intWeight2(); break;
case 3: intWeight3(); break;
case 4: intWeight4(); break;
case 5: intWeight5(); break;
case 6: intWeight6(); break;
case 7: intWeight7(); break;
case 8: intWeight8(); break;
case 9: intWeight9(); break;
case 10: intWeight10(); break;
case 11: intWeight11(); break;
case 12: intWeight12(); break;
case 13: intWeight13(); break;
case 14: intWeight14(); break;
case 15: intWeight15(); break;
}
}
RNN::RNN(int flavor_num, int timestep)
{
batch_size = Day_Num / timestep; // 2維陣列的行
time_step = timestep;
thisFlavorNUM = flavor_num;
reshape(time_step, flavor_num);
x = new double[innode];
y = new double[outnode];
chooseVMtoInt(flavor_num);
// winit((double*)W_I, innode * hidenode);
// winit((double*)U_I, hidenode * hidenode);
// winit((double*)W_F, innode * hidenode);
// winit((double*)U_F, hidenode * hidenode);
// winit((double*)W_O, innode * hidenode);
// winit((double*)U_O, hidenode * hidenode);
// winit((double*)W_G, innode * hidenode);
// winit((double*)U_G, hidenode * hidenode);
// winit((double*)W_out, hidenode * outnode);
}
RNN::~RNN()
{
delete x;
delete y;
delete []matX;
delete []matY;
}
/*
x是x時間軸的資料,二維資料,由timestep結合資料多少來定大小
*/
void RNN::train(int loop_time, double alpha)
{
int epoch, i, j, k, m, p, batch_count;
batch_count = 0;
// 訓練一次就會清空一次這些 vector
vector<double*> I_vector; //輸入門
vector<double*> F_vector; //遺忘門
vector<double*> O_vector; //輸出門
vector<double*> G_vector; //新記憶
vector<double*> S_vector; //狀態值
vector<double*> h_vector; //輸出值
vector<double> y_delta; //儲存誤差關於輸出層的偏導
for(epoch = 0; epoch < loop_time; epoch++) //訓練次數
{
double e = 0.0; //誤差
double *predict = new double[time_step]; //儲存每次生成的預測值 每一個batch產生一組預測值
memset(predict, 0, sizeof(int)*time_step);
//在0時刻是沒有之前的隱含層的,所以初始化一個全為0的
double *S = new double[hidenode]; //狀態值
double *h = new double[hidenode]; //輸出值
for(i=0; i<hidenode; i++)
{
S[i] = 0;
h[i] = 0;
}
S_vector.push_back(S);
h_vector.push_back(h);
double *x_batch_one = matX[batch_count];
double *y_batch_one = matY[batch_count];
batch_count++;
if(batch_count >= batch_size)
batch_count = 0;
//正向傳播
for(p=0; p<time_step; p++) //遍歷一個batch裡面的值
{
x[0] = x_batch_one[p];
double t = (double)y_batch_one[p]; //預測的y值
double *in_gate = new double[hidenode]; //輸入門
double *out_gate = new double[hidenode]; //輸出門
double *forget_gate = new double[hidenode]; //遺忘門
double *g_gate = new double[hidenode]; //新記憶
double *state = new double[hidenode]; //狀態值
double *h = new double[hidenode]; //隱層輸出值
for(j=0; j<hidenode; j++)
{
//輸入層轉播到隱層
double inGate = 0.0;
double outGate = 0.0;
double forgetGate = 0.0;
double gGate = 0.0;
for(m=0; m<innode; m++) // 輸入門,遺忘門,輸出門,
{
inGate += x[m] * W_I[m][j]; // 輸入門
outGate += x[m] * W_O[m][j]; // 輸出門
forgetGate += x[m] * W_F[m][j]; // 遺忘門
gGate += x[m] * W_G[m][j]; // 產生新記憶
}
double *h_pre = h_vector.back(); // 返回末尾值,上一個隱層的權值
double *state_pre = S_vector.back();//
for(m=0; m<hidenode; m++)
{
inGate += h_pre[m] * U_I[m][j]; // 上一個隱層與本隱層的輸入權值
outGate += h_pre[m] * U_O[m][j];
forgetGate += h_pre[m] * U_F[m][j];
gGate += h_pre[m] * U_G[m][j];
}
in_gate[j] = sigmoid(inGate);
out_gate[j] = sigmoid(outGate);
forget_gate[j] = sigmoid(forgetGate);
g_gate[j] = tanh(gGate);//g_gate[j] = sigmoid(gGate);//這裡修改了sigmoid!
double s_pre = state_pre[j];
state[j] = forget_gate[j] * s_pre + g_gate[j] * in_gate[j];
h[j] = out_gate[j] * tanh(state[j]);//h[j] = in_gate[j] * tanh(state[j]); 修改了out
}
//隱藏層傳播到輸出層
for(k=0; k<outnode; k++)
{
double out = 0.0;
for(j=0; j<hidenode; j++)
out += h[j] * W_out[j][k];
y[k] = sigmoid(out); //輸出層各單元輸出 如果是做迴歸問題 其實不應該加sigmoid函式的
}
// predict[p] = (int)floor(y[0] + 0.5); //記錄預測值
predict[p] = y[0]; //記錄預測值
//儲存隱藏層,以便下次計算
I_vector.push_back(in_gate);
F_vector.push_back(forget_gate);
O_vector.push_back(out_gate);
S_vector.push_back(state);
G_vector.push_back(g_gate);
h_vector.push_back(h);
//儲存標準誤差關於輸出層的偏導 t 是實際值
y_delta.push_back( (t - y[0]) * dsigmoid(y[0]) );
e += fabs(t - y[0]); //誤差
}
//誤差反向傳播
//隱含層偏差,通過當前之後一個時間點的隱含層誤差和當前輸出層的誤差計算
double h_delta[hidenode];
double *O_delta = new double[hidenode];
double *I_delta = new double[hidenode];
double *F_delta = new double[hidenode];
double *G_delta = new double[hidenode];
double *state_delta = new double[hidenode];
//當前時間之後的一個隱藏層誤差
double *O_future_delta = new double[hidenode];
double *I_future_delta = new double[hidenode];
double *F_future_delta = new double[hidenode];
double *G_future_delta = new double[hidenode];
double *state_future_delta = new double[hidenode];
double *forget_gate_future = new double[hidenode];
for(j=0; j<hidenode; j++)
{
O_future_delta[j] = 0;
I_future_delta[j] = 0;
F_future_delta[j] = 0;
G_future_delta[j] = 0;
state_future_delta[j] = 0;
forget_gate_future[j] = 0;
}
for(p=time_step-1; p>=0 ; p--)
{
x[0] = x_batch_one[p];
//當前隱藏層
double *in_gate = I_vector[p]; //輸入門
double *out_gate = O_vector[p]; //輸出門
double *forget_gate = F_vector[p]; //遺忘門
double *g_gate = G_vector[p]; //新記憶
double *state = S_vector[p+1]; //狀態值
double *h = h_vector[p+1]; //隱層輸出值
//前一個隱藏層
double *h_pre = h_vector[p];
double *state_pre = S_vector[p];
for(k=0; k<outnode; k++) //對於網路中每個輸出單元,更新權值
{
//更新隱含層和輸出層之間的連線權
for(j=0; j<hidenode; j++)
W_out[j][k] += alpha * y_delta[p] * h[j];
}
//對於網路中每個隱藏單元,計算誤差項,並更新權值
for(j=0; j<hidenode; j++)
{
h_delta[j] = 0.0;
for(k=0; k<outnode; k++)
{
h_delta[j] += y_delta[p] * W_out[j][k];
}
for(k=0; k<hidenode; k++)
{
h_delta[j] += I_future_delta[k] * U_I[j][k];
h_delta[j] += F_future_delta[k] * U_F[j][k];
h_delta[j] += O_future_delta[k] * U_O[j][k];
h_delta[j] += G_future_delta[k] * U_G[j][k];
}
O_delta[j] = 0.0;
I_delta[j] = 0.0;
F_delta[j] = 0.0;
G_delta[j] = 0.0;
state_delta[j] = 0.0;
//隱含層的校正誤差
O_delta[j] = h_delta[j] * tanh(state[j]) * dsigmoid(out_gate[j]);
state_delta[j] = h_delta[j] * out_gate[j] * dtanh(state[j]) +
state_future_delta[j] * forget_gate_future[j];
F_delta[j] = state_delta[j] * state_pre[j] * dsigmoid(forget_gate[j]);
I_delta[j] = state_delta[j] * g_gate[j] * dsigmoid(in_gate[j]);
G_delta[j] = state_delta[j] * in_gate[j] * dtanh(g_gate[j]);// 這裡修改成dtanh了
//更新前一個隱含層和現在隱含層之間的權值
for(k=0; k<hidenode; k++)
{
U_I[k][j] += alpha * I_delta[j] * h_pre[k];
U_F[k][j] += alpha * F_delta[j] * h_pre[k];
U_O[k][j] += alpha * O_delta[j] * h_pre[k];
U_G[k][j] += alpha * G_delta[j] * h_pre[k];
}
//更新輸入層和隱含層之間的連線權
for(k=0; k<innode; k++)
{
W_I[k][j] += alpha * I_delta[j] * x[k];
W_F[k][j] += alpha * F_delta[j] * x[k];
W_O[k][j] += alpha * O_delta[j] * x[k];
W_G[k][j] += alpha * G_delta[j] * x[k];
}
}
if(p == time_step-1)
{
delete O_future_delta;
delete F_future_delta;
delete I_future_delta;
delete G_future_delta;
delete state_future_delta;
delete forget_gate_future;
}
O_future_delta = O_delta;
F_future_delta = F_delta;
I_future_delta = I_delta;
G_future_delta = G_delta;
state_future_delta = state_delta;
forget_gate_future = forget_gate;
}
delete O_future_delta;
delete F_future_delta;
delete I_future_delta;
delete G_future_delta;
delete state_future_delta;
if(epoch % 1000 == 0 && showLog)
{
cout << "error:" << e << endl;
cout << "x:\t" ;
for(int l=0; l<batch_size; l++)
{
for(k=0; k<time_step; k++)
cout << matX[l][k]<<"\t";
}
cout << "\nreal:\t" ;
for(int l=0; l<batch_size; l++)
{
for(k=0; k<time_step; k++)
cout << setprecision(3) << matY[l][k]<<"\t";
}
cout << "\npred:\t" ;
for(k=0; k<time_step; k++)
cout << setprecision(3) << predict[k]<<"\t";
cout << endl;
}
delete predict;
for(i=0; i<(int)I_vector.size(); i++)
delete I_vector[i];
for(i=0; i<(int)F_vector.size(); i++)
delete F_vector[i];
for(i=0; i<(int)O_vector.size(); i++)
delete O_vector[i];
for(i=0; i<(int)G_vector.size(); i++)
delete G_vector[i];
for(i=0; i<(int)S_vector.size(); i++)
delete S_vector[i];
for(i=0; i<(int)h_vector.size(); i++)
delete h_vector[i];
I_vector.clear();
F_vector.clear();
O_vector.clear();
G_vector.clear();
S_vector.clear();
h_vector.clear();
y_delta.clear();
} // 訓練次數的迴圈for
if(writeFlag)
writeData(thisFlavorNUM);
}
int RNN::rnn_predict(int timelength)
{
// 訓練一次就會清空一次這些 vector
vector<double*> I_vector; //輸入門
vector<double*> F_vector; //遺忘門
vector<double*> O_vector; //輸出門
vector<double *> G_vector; //新記憶
vector<double *> S_vector; //狀態值
vector<double *> h_vector; //輸出值
//在0時刻是沒有之前的隱含層的,所以初始化一個全為0的
double * S = new double[hidenode]; //狀態值
double * h = new double[hidenode]; //輸出值
for(int i = 0; i < hidenode; i++)
{
S[i] = 0;
h[i] = 0;
}
S_vector.push_back(S);
h_vector.push_back(h);
double * predict = new double[time_step]; //儲存每次生成的預測值 每一個batch產生一組預測值
memset(predict, 0, sizeof(double)*time_step);
double * x_batch_one = new double[time_step];
for(int i = 1; i<=time_step; i++)
{
x_batch_one[i - 1] = Day_Num + i;
x_batch_one[i - 1] = (x_batch_one[i - 1] - minX)/(maxX - minX) * XDataRate - 1;
}
//正向傳播
for(int p = 0; p < time_step; p++) //遍歷一個batch裡面的值
{
x[0] = x_batch_one[p];
double * in_gate = new double[hidenode]; //輸入門
double * out_gate = new double[hidenode]; //輸出門
double * forget_gate = new double[hidenode]; //遺忘門
double * g_gate = new double[hidenode]; //新記憶
double * state = new double[hidenode]; //狀態值
double * h = new double[hidenode]; //隱層輸出值
for(int j = 0; j < hidenode; j++)
{
//輸入層轉播到隱層
double inGate = 0.0;
double outGate = 0.0;
double forgetGate = 0.0;
double gGate = 0.0;
for(int m = 0; m < innode; m++) // 輸入門,遺忘門,輸出門,
{
inGate += x[m] * W_I[m][j]; // 輸入門
outGate += x[m] * W_O[m][j]; // 輸出門
forgetGate += x[m] * W_F[m][j]; // 遺忘門
gGate += x[m] * W_G[m][j]; // 產生新記憶
}
double * h_pre = h_vector.back(); // 返回末尾值,上一個隱層的權值
double * state_pre = S_vector.back(); //
for(int m = 0; m < hidenode; m++)
{
inGate += h_pre[m] * U_I[m][j]; // 上一個隱層與本隱層的輸入權值
outGate += h_pre[m] * U_O[m][j];
forgetGate += h_pre[m] * U_F[m][j];
gGate += h_pre[m] * U_G[m][j];
}
in_gate[j] = sigmoid(inGate);
out_gate[j] = sigmoid(outGate);
forget_gate[j] = sigmoid(forgetGate);
g_gate[j] = tanh(gGate);
double s_pre = state_pre[j];
state[j] = forget_gate[j] * s_pre + g_gate[j] * in_gate[j];
h[j] = out_gate[j] * tanh(state[j]);
}
//隱藏層傳播到輸出層
for(int k = 0; k < outnode; k++)
{
double out = 0.0;
for(int j = 0; j < hidenode; j++)
{
out += h[j] * W_out[j][k];
}
y[k] = sigmoid(out); //輸出層各單元輸出 如果是做迴歸問題 其實不應該加sigmoid函式的
}
// predict[p] = (int)floor(y[0] + 0.5); //記錄預測值
predict[p] = y[0]; //記錄預測值
cout<<predict[p]<<"\t";
//儲存隱藏層,以便下次計算
I_vector.push_back(in_gate);
F_vector.push_back(forget_gate);
O_vector.push_back(out_gate);
S_vector.push_back(state);
G_vector.push_back(g_gate);
h_vector.push_back(h);
}
cout<<"\n";
int i;
for(i=0; i<(int)I_vector.size(); i++)
delete I_vector[i];
for(i=0; i<(int)F_vector.size(); i++)
delete F_vector[i];
for(i=0; i<(int)O_vector.size(); i++)
delete O_vector[i];
for(i=0; i<(int)G_vector.size(); i++)
delete G_vector[i];
for(i=0; i<(int)S_vector.size(); i++)
delete S_vector[i];
for(i=0; i<(int)h_vector.size(); i++)
delete h_vector[i];
I_vector.clear();
F_vector.clear();
O_vector.clear();
G_vector.clear();
S_vector.clear();
h_vector.clear();
double temp = predict[timelength - 1] / YDataRate * (maxY - minY) + minY;
int temp2 = (int)floor(temp + 0.5) - (int)maxY;
if(temp2 < 0)
{
cout<<"VM"<<thisFlavorNUM<<" predict error!!! "<<temp2<<endl;
temp2 = 1;
}
else
{
cout<<"VM"<<thisFlavorNUM<<" predict:"<<temp2<<endl;
}
delete predict;
delete x_batch_one;
return temp2;
}
void RNN::reshape(int timestep, int flavor_num) // timestep從1開始算 flavor從1開始算 加上歸一化
{
int start = Day_Num % timestep; // 從1一維陣列的第幾個開始取值
int day = start + 1;
maxY = VM_dwz[Day_Num - 1][flavor_num - 1];
minY = VM_dwz[start][flavor_num - 1];
maxX = Day_Num;
minX = day;
matY = new double *[batch_size];
matX = new double *[batch_size];
for(int i=0; i<batch_size; i++)
{
matX[i] = new double [timestep];
matY[i] = new double [timestep];
}
for(int i=0; i<batch_size; i++)
{
for(int j=0; j<timestep; j++)
{
matY[i][j] = VM_dwz[start + i*timestep + j][flavor_num - 1];
matY[i][j] = (matY[i][j] - minY)/(maxY - minY) * YDataRate;
matX[i][j] = (day - minX)/(maxX - minX) * XDataRate - 1; // 縮放到-1 1
day ++;
}
}
}
void RNN::writeData(int num)
{
// ifstream infile; //輸入流
ofstream outfile; //輸出流
stringstream ss;
ss<<"data"<<num<<".cpp";
outfile.open(ss.str(), ios::out);
if(!outfile.is_open ())
cout << "Open file failure" << endl;
outfile//<<"#include \"data.h\""<<endl
<<"#include \"lstm_chk.h\""<<endl
<<"void RNN::intWeight"<<num<<"()"<<endl
<<"{"<<endl;
for(int i=0; i<innode; i++)
for(int j=0; j<hidenode; j++)
{
outfile<<"\tW_I["<<i<<"]["<<j<<"] = "<<W_I[i][j]<<";"<<endl;
outfile<<"\tW_F["<<i<<"]["<<j<<"] = "<<W_F[i][j]<<";"<<endl;
outfile<<"\tW_O["<<i<<"]["<<j<<"] = "<<W_O[i][j]<<";"<<endl;
outfile<<"\tW_G["<<i<<"]["<<j<<"] = "<<W_G[i][j]<<";"<<endl;
}
for(int i=0; i<hidenode; i++)
for(int j=0; j<hidenode; j++)
{
outfile<<"\tU_I["<<i<<"]["<<j<<"] = "<<U_I[i][j]<<";"<<endl;
outfile<<"\tU_F["<<i<<"]["<<j<<"] = "<<U_F[i][j]<<";"<<endl;
outfile<<"\tU_O["<<i<<"]["<<j<<"] = "<<U_O[i][j]<<";"<<endl;
outfile<<"\tU_G["<<i<<"]["<<j<<"] = "<<U_G[i][j]<<";"<<endl;
}
for(int i=0; i<hidenode; i++)
for(int j=0; j<outnode; j++)
{
outfile<<"\tW_out["<<i<<"]["<<j<<"] = "<<W_out[i][j]<<";"<<endl;
}
outfile<<"}"<<endl;
outfile.close();
}
標頭檔案:
#ifndef _LSTM_CHK_H
#define _LSTM_CHK_H
#include "include.h"
#define innode 1 //輸入結點數,將輸入2個加數
#define hidenode 20 //隱藏結點數,儲存“攜帶位”
#define outnode 1 //輸出結點數,將輸出一個預測數字
#define YDataRate 0.7
#define XDataRate 2
#define showLog 0
#define writeFlag 0 //是否更新引數
#define randval(high) ( (double)rand() / RAND_MAX * high )
#define uniform_plus_minus_one ( (double)( 2.0 * rand() ) / ((double)RAND_MAX + 1.0) - 1.0 ) //均勻隨機分佈
#define nan uniform_plus_minus_one
class RNN
{
public:
RNN(int flavor_num, int timestep);
virtual ~RNN();
void train(int loop_time,double alpha);
void reshape(int timestep, int flavor_num);
int rnn_predict(int timelength);
void writeData(int num);
void intWeight1();
void intWeight2();
void intWeight3();
void intWeight4();
void intWeight5();
void intWeight6();
void intWeight7();
void intWeight8();
void intWeight9();
void intWeight10();
void intWeight11();
void intWeight12();
void intWeight13();
void intWeight14();
void intWeight15();
void chooseVMtoInt(int num);
private:
double W_I[innode][hidenode]; //連線輸入與隱含層單元中輸入門的權值矩陣
double U_I[hidenode][hidenode]; //連線上一隱層輸出與本隱含層單元中輸入門的權值矩陣
double W_F[innode][hidenode]; //連線輸入與隱含層單元中遺忘門的權值矩陣
double U_F[hidenode][hidenode]; //連線上一隱含層與本隱含層單元中遺忘門的權值矩陣
double W_O[innode][hidenode]; //連線輸入與隱含層單元中遺忘門的權值矩陣
double U_O[hidenode][hidenode]; //連線上一隱含層與現在時刻的隱含層的權值矩陣
double W_G[innode][hidenode]; //用於產生新記憶的權值矩陣
double U_G[hidenode][hidenode]; //用於產生新記憶的權值矩陣
double W_out[hidenode][outnode]; //連線隱層與輸出層的權值矩陣
double *x; //layer 0 輸出值,由輸入向量直接設定
//double *layer_1; //layer 1 輸出值
double *y; //layer 2 輸出值
int time_step; //當前位的資料跟之前多少位的資料有關 訓練資料有多少列
int batch_size; // 訓練資料有多少行
int data_start;
int thisFlavorNUM;
double **matX; // 訓練資料的變形
double **matY;
double maxY;
double minY;
double maxX;
double minX;
};
#endif // _LSTM_CHK_H
以上詳解部分是我通過看YouTube視訊得來的,是一個美國教授的公開課,但是需要一些小工具才能出去看,你們懂得。