PyTorch搭建神經網路模型,並匯入資料進行訓練
阿新 • • 發佈:2020-12-07
技術標籤:《動手學深度學習》 記錄
1.PyTorch搭建神經網路模型的四種方法
參考https://www.cnblogs.com/picassooo/p/12817629.html
方法一:torch.nn.Sequential()
torch.nn.Sequential類是torch.nn中的一種序列容器,引數會按照我們定義好的序列自動傳遞下去。
# nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True) # padding=2, 使輸出的維度不變 # 計算輸出維度 (width - kernel_size + 2*padding)/stride + 1 # 從卷積層 到 全連線層,需要把(32,7,7)結果拉成(32*7*7,1) import torch.nn as nn class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Sequential( # input shape (1, 28, 28) nn.Conv2d(1, 16, 5, 1, 2), # output shape (16, 28, 28) nn.ReLU(), nn.MaxPool2d(2), # output shape (16, 14, 14) ) self.conv2 = nn.Sequential( nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) nn.ReLU(), nn.MaxPool2d(2), # output shape (32, 7, 7) ) self.linear = nn.Linear(32*7*7, 10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(x.size(0), -1) output = self.linear(x) return output net = Net() print(net)
執行結果:
注意:這樣做有一個問題,每一個層是沒有名稱,預設的是以0、1、2、3來命名,從上面的執行結果也可以看出。
方法二:torch.nn.Sequential() 搭配 collections.OrderDict()
import torch.nn as nn from collections import OrderedDict # OrderedDict是字典的子類,可以記住元素的新增順序 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Sequential(OrderedDict([ ('conv1', nn.Conv2d(1, 16, 5, 1, 2)), ('ReLU1', nn.ReLU()), ('pool1', nn.MaxPool2d(2)), ])) self.conv2 = nn.Sequential(OrderedDict([ ('conv2', nn.Conv2d(16, 32, 5, 1, 2)), ('ReLU2', nn.ReLU()), ('pool2', nn.MaxPool2d(2)), ])) self.linear = nn.Linear(32*7*7, 10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(x.size(0), -1) output = self.linear(x) return output net = Net() print(net)
執行結果:
從上面的結果中可以看出,這個時候每一個層都有了自己的名稱,但是此時需要注意,我們並不能夠通過名稱直接獲取層,依然只能通過索引index,即net.conv1[1] 是正確的,net.conv1['ReLU1']是錯誤的。這是因為torch.nn.Sequential()只支援index訪問。
方法三:torch.nn.Sequential() 搭配 add_module()
import torch.nn as nn class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Sequential() self.conv1.add_module('conv1', nn.Conv2d(1, 16, 5, 1, 2)) self.conv1.add_module('ReLU1', nn.ReLU()) self.conv1.add_module('pool1', nn.MaxPool2d(2)) self.conv2 = nn.Sequential() self.conv2.add_module('conv2', nn.Conv2d(16, 32, 5, 1, 2)) self.conv2.add_module('ReLU2', nn.ReLU()) self.conv2.add_module('pool2', nn.MaxPool2d(2)) self.linear = nn.Linear(32*7*7, 10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(x.size(0), -1) output = self.linear(x) return output net = Net() print(net)
執行結果:
方法四
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)
self.linear = nn.Linear(32*7*7, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
output = self.linear(x)
return output
net = Net()
print(net)
執行結果:
2. 構建loss函式,選擇優化方法
optimizer = optim.Adam(net.parameters(), lr=lr)
self.scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)
self.criterion = nn.CrossEntropyLoss()
3. 匯入資料(以mnist資料為例)
mnist = input_data.read_data_sets("data/MNIST_data/", one_hot=False)
train_data_images = []
train_data_labels = []
test_data_images = []
test_data_labels = []
X_train, y_train = mnist.train.images, mnist.train.labels # 返回的 X_train 是 numpy 下的 多維陣列,(55000, 784), (55000,)
X_test, y_test = mnist.test.images, mnist.test.labels # (10000, 784), (10000,)
X_valid, y_valid = mnist.validation.images, mnist.validation.labels # (5000, 784),(5000, )
train_data_images = np.concatenate((X_train, X_valid), axis=0) #(60000, 784)
train_data_labels = np.concatenate((y_train, y_valid), axis=0) #(60000,)
test_data_images = X_test #(10000, 784)
test_data_labels = y_test #(10000,)
#變形
train_data_images = np.reshape(train_data_images, [-1, 1, 28, 28]) #(60000, 1, 28, 28)
test_data_images = np.reshape(test_data_images, [-1, 1, 28, 28]) #(10000, 1, 28, 28)
4.訓練
for epoch in range(epochs):
#iterate_minibatches()對train_data_images, train_data_labels批量劃分,shuffle=True,表示亂序
for data, target in iterate_minibatches(train_data_images, train_data_labels, train_batch_size, shuffle=True):
optimizer.zero_grad() # 清除梯度
output = net(data)
loss = criterion(output, target) # 計算誤差
loss.backward()
optimizer.step()
train_loss += loss.item() # 計算1個epoch的loss和
pred = torch.max(output, 1) # max函式會返回兩個tensor,第一個tensor是每行的最大值;第二個tensor是每行最大值的索引。
train_correct += np.sum(pred[1] == target) # 計算1個epoch的accuracy
total += target.size(0)
Train_Accuracy = train_correct / total
Train_Loss = train_loss
#def iterate_minibatches(self, inputs, targets, batch_size, shuffle=True):
# assert len(inputs) == len(targets)
# if shuffle:
# indices = np.arange(len(inputs))
# np.random.shuffle(indices)
#
# start_idx = None
# for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
# if shuffle:
# excerpt = indices[start_idx:start_idx + batch_size]
# else:
# excerpt = slice(start_idx, start_idx + batch_size)
# yield inputs[excerpt], targets[excerpt]
#
# if start_idx is not None and start_idx + batch_size < len(inputs):
# excerpt = indices[start_idx + batch_size:] if shuffle else slice(start_idx + batch_size, len(inputs))
# yield inputs[excerpt], targets[excerpt]
5. 測試
for data, target in iterate_minibatches(self.test_x, self.test_y, self.test_batch_size, shuffle=False):
output = net(data)
loss = criterion(output, target)
test_loss += loss.item()
pred = torch.max(output, 1)
test_correct += np.sum(pred[1] == target)
total += target.size(0)
Test_Accuracy = test_correct / total
Test_Loss = test_loss
6. 儲存模型
model_out_path = "model.pth"
torch.save(net, model_out_path)
print("Checkpoint saved to {}".format(model_out_path))