Pytorch + LSTM
LSTM 原理圖(圖來源)
LSTM原理解析(更多細節參考: LSTM詳細介紹):
class torch.nn.
LSTMCell
(input_size, hidden_size, num_of_layers,bias=True)
在torch 中,定義一個LSTM單元,只需要給出兩個引數: input_size和 hidden_size
引數解析: input_size:輸入的特徵的維度
hidden_size:隱層狀態的維度(hidden_state,cell_state具有相同的維度)
num_of_layers: LSTM的層數
bias:預設為True,在輸入過程和隱層生成的過程中,有偏置項,否則則沒有
應用:
輸入:input,(h_0,c_0)
input: shape=[batch_size,input_size],是一個包含輸入特徵的張量
h_0: shape=[batch_size,hidden_size],是一個張量,包含最初始的隱藏狀態,對batch中的每個元素都有意個對應的隱藏狀態
c_0: shape=[batch_size,hidden_size],是一個張量,包含最初始的cell狀態,對batch中的每個元素都有意個對應的cell狀態
備註:如果(h_0,c_0)在應用中沒有給出,則預設其初始值為0
輸出: h_1,c_1
h_1:shape=[batch_size,hidden_size],是一個張量,包含最初始的隱藏狀態,對batch中的每個元素都有意個對應的隱藏狀態
c_1: shape=[batch_size,hidden_size],是一個張量,包含最初始的cell狀態,對batch中的每個元素都有意個對應的cell狀態
LSTM的內部隱含的變數:
weight_ih: 從input-->hidden 的權重,shape=[4*hidden_size x input_size]
bias_ih:從input-->hidden 的偏置,shape=[4*hidden_size ]
weight_hh:從hidden-->hidden 的權重,shape=[4*hidden_size x hidden_size]
bias_hh:從hidden-->hidden 的偏置,shape=[4*hidden_size ]
示例:
>>> rnn = nn.LSTMCell(10, 20)
>>> input = torch.randn(6, 3, 10)
>>> hx = torch.randn(3, 20)
>>> cx = torch.randn(3, 20)
>>> output = []
>>> for i in range(6):
hx, cx = rnn(input[i], (hx, cx))
output.append(hx)
示例解析:
input_size=10
hidden_size=20
batch_size=3
batch_num = 6
在應用LSTM物件rnn的時候輸入了(hx, cx),給定了(h_0,c_0)的初始值,在實際情況中,不給出也行,主要看需求
疑問: c_1 與 h_1 的區別
回答:參考上面的公式的第五條和第六條,cell是用來傳遞資訊的,並不真正輸出,hidden_state是對cell_state處理啟用後的結果,是輸出的有用的資訊. 即: LSTM會對 hidden_state 及 cell_state 都有輸出,但 hidden_state 才是我們應用需要的.,最終要用的.
下圖為個人筆記:
convLSTM程式碼:
#-*-coding:utf-8-*-
import torch
import torch.nn as nn
from torch.autograd import Variable
class ConvLSTMCell(nn.Module):
def __init__(self, input_channels, hidden_channels, kernel_size, bias=True):
super(ConvLSTMCell, self).__init__()
assert hidden_channels % 2 == 0
self.input_channels = input_channels
self.hidden_channels = hidden_channels
self.bias = bias
self.kernel_size = kernel_size
self.num_features = 4
self.padding = int((kernel_size - 1) / 2)
# 輸入變換
self.Wxi = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True)
self.Whi = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False)
# 遺忘門
self.Wxf = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True)
self.Whf = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False)
# 記憶門
self.Wxc = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True)
self.Whc = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False)
# 輸出門
self.Wxo = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True)
self.Who = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False)
self.Wci = None
self.Wcf = None
self.Wco = None
def forward(self, x, h, c):
## LSTMcell_input_1 = sigmoid(Wii*x + Whi * h)
## (包含三個部分,新的樣本輸入 x,上一個 LSTMcell 的輸出 h,上一個 LSTMcell 的 cell_state c)
## 預設 cell_state c的權值為 0
ci = torch.sigmoid(self.Wxi(x) + self.Whi(h) + c * self.Wci)
##LSTMcell_forget = sigmoid(Wif*x + Whf * h)
cf = torch.sigmoid(self.Wxf(x) + self.Whf(h) + c * self.Wcf) ## LSTMcell_forget
## LSTMcell_new_cell_state = last_cell_state * LSTMcell_forget + LSTMcell_input_1 *torch.tanh(self.Wxc(x) + self.Whc(h))
#
cc = cf * c + ci * torch.tanh(self.Wxc(x) + self.Whc(h))
## LSTMcell_new_cell_state
co = torch.sigmoid(self.Wxo(x) + self.Who(h) + cc * self.Wco) # co : 輸出門的輸入(cell部分)
ch = co * torch.tanh(cc) # ch : 輸出門的輸出
return ch, cc
def init_hidden(self, batch_size, hidden, shape):
self.Wci = Variable(torch.zeros(1, hidden, shape[0], shape[1])).cuda()
self.Wcf = Variable(torch.zeros(1, hidden, shape[0], shape[1])).cuda()
self.Wco = Variable(torch.zeros(1, hidden, shape[0], shape[1])).cuda()
return (Variable(torch.zeros(batch_size, hidden, shape[0], shape[1])).cuda(),
Variable(torch.zeros(batch_size, hidden, shape[0], shape[1])).cuda())
class ConvLSTM(nn.Module):
# input_channels corresponds to the first input feature map
# hidden state is a list of succeeding lstm layers.
def __init__(self, input_channels, hidden_channels, kernel_size, step=1, effective_step=[1], bias=True):
super(ConvLSTM, self).__init__()
self.input_channels = [input_channels] + hidden_channels
self.hidden_channels = hidden_channels
self.kernel_size = kernel_size
self.num_layers = len(hidden_channels)
self.step = step
self.bias = bias
self.effective_step = effective_step
self._all_layers = []
for i in range(self.num_layers):
name = 'cell{}'.format(i)
cell = ConvLSTMCell(self.input_channels[i], self.hidden_channels[i], self.kernel_size, self.bias)
setattr(self, name, cell)
self._all_layers.append(cell)
def forward(self, input):
internal_state = []
outputs = []
for step in range(self.step):
x = input
for i in range(self.num_layers):
# all cells are initialized in the first step
name = 'cell{}'.format(i)
if step == 0:
bsize, _, height, width = x.size()
(h, c) = getattr(self, name).init_hidden(batch_size=bsize, hidden=self.hidden_channels[i], shape=(height, width))
internal_state.append((h, c))
# do forward
(h, c) = internal_state[i]
x, new_c = getattr(self, name)(x, h, c)
internal_state[i] = (x, new_c)
# only record effective steps
if step in self.effective_step:
outputs.append(x)
return outputs, (x, new_c)
if __name__ == '__main__':
# gradient check
convlstm = ConvLSTM(input_channels=512, hidden_channels=[128, 64, 64, 32, 32], kernel_size=3, step=5, effective_step=[4]).cuda()
loss_fn = torch.nn.MSELoss()
input = Variable(torch.randn(1, 512, 64, 32)).cuda()
target = Variable(torch.randn(1, 32, 64, 32)).double().cuda()
output = convlstm(input)
output = output[0][0].double()
res = torch.autograd.gradcheck(loss_fn, (output, target), eps=1e-6, raise_exception=True)
print(res)