1. 程式人生 > 其它 >PyTorch搭建神經網路模型,並匯入資料進行訓練

PyTorch搭建神經網路模型,並匯入資料進行訓練

技術標籤:《動手學深度學習》 記錄

1.PyTorch搭建神經網路模型的四種方法

參考https://www.cnblogs.com/picassooo/p/12817629.html

方法一:torch.nn.Sequential()

torch.nn.Sequential類是torch.nn中的一種序列容器,引數會按照我們定義好的序列自動傳遞下去。

# nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
# padding=2, 使輸出的維度不變
# 計算輸出維度 (width - kernel_size + 2*padding)/stride + 1
# 從卷積層 到 全連線層,需要把(32,7,7)結果拉成(32*7*7,1)

import torch.nn as nn
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(           # input shape (1, 28, 28)
            nn.Conv2d(1, 16, 5, 1, 2),        # output shape (16, 28, 28)
            nn.ReLU(),
            nn.MaxPool2d(2),                  # output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),       # output shape (32, 14, 14)
            nn.ReLU(),
            nn.MaxPool2d(2),                  # output shape (32, 7, 7)
        )
        self.linear = nn.Linear(32*7*7, 10)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.linear(x)
        return output
 
net = Net()
print(net)

執行結果:

注意:這樣做有一個問題,每一個層是沒有名稱,預設的是以0、1、2、3來命名,從上面的執行結果也可以看出。

方法二:torch.nn.Sequential() 搭配 collections.OrderDict()

import torch.nn as nn
from collections import OrderedDict   # OrderedDict是字典的子類,可以記住元素的新增順序
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(1, 16, 5, 1, 2)),
            ('ReLU1', nn.ReLU()),
            ('pool1', nn.MaxPool2d(2)),
        ]))
        self.conv2 = nn.Sequential(OrderedDict([
            ('conv2', nn.Conv2d(16, 32, 5, 1, 2)),
            ('ReLU2', nn.ReLU()),
            ('pool2', nn.MaxPool2d(2)),
        ]))
        self.linear = nn.Linear(32*7*7, 10)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.linear(x)
        return output
 
net = Net()
print(net)

執行結果:

從上面的結果中可以看出,這個時候每一個層都有了自己的名稱,但是此時需要注意,我們並不能夠通過名稱直接獲取層,依然只能通過索引index,即net.conv1[1] 是正確的,net.conv1['ReLU1']是錯誤的。這是因為torch.nn.Sequential()只支援index訪問。

方法三:torch.nn.Sequential() 搭配 add_module()

import torch.nn as nn
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential()
        self.conv1.add_module('conv1', nn.Conv2d(1, 16, 5, 1, 2))
        self.conv1.add_module('ReLU1', nn.ReLU())
        self.conv1.add_module('pool1', nn.MaxPool2d(2))
 
        self.conv2 = nn.Sequential()
        self.conv2.add_module('conv2', nn.Conv2d(16, 32, 5, 1, 2))
        self.conv2.add_module('ReLU2', nn.ReLU())
        self.conv2.add_module('pool2', nn.MaxPool2d(2))
 
        self.linear = nn.Linear(32*7*7, 10)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        output = self.linear(x)
        return output
 
net = Net()
print(net)

執行結果:

方法四

import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
        self.conv2 = nn.Conv2d(16, 32, 5, 1, 2)
        self.linear = nn.Linear(32*7*7, 10)
 
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        output = self.linear(x)
        return output
 
net = Net()
print(net)

執行結果:

2. 構建loss函式,選擇優化方法

optimizer = optim.Adam(net.parameters(), lr=lr)
self.scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)
self.criterion = nn.CrossEntropyLoss()

3. 匯入資料(以mnist資料為例)

mnist = input_data.read_data_sets("data/MNIST_data/", one_hot=False)
train_data_images = []
train_data_labels = []
test_data_images = []
test_data_labels = []
 
X_train, y_train = mnist.train.images, mnist.train.labels  # 返回的 X_train 是 numpy 下的 多維陣列,(55000, 784), (55000,)
X_test, y_test = mnist.test.images, mnist.test.labels  # (10000, 784), (10000,)
X_valid, y_valid = mnist.validation.images, mnist.validation.labels  # (5000, 784),(5000, )

train_data_images = np.concatenate((X_train, X_valid), axis=0) #(60000, 784)
train_data_labels = np.concatenate((y_train, y_valid), axis=0) #(60000,)
test_data_images = X_test #(10000, 784)
test_data_labels = y_test #(10000,)
 
#變形
train_data_images = np.reshape(train_data_images, [-1, 1, 28, 28]) #(60000, 1, 28, 28)
test_data_images = np.reshape(test_data_images, [-1, 1, 28, 28]) #(10000, 1, 28, 28)

4.訓練

for epoch in range(epochs):
    #iterate_minibatches()對train_data_images, train_data_labels批量劃分,shuffle=True,表示亂序
    for data, target in iterate_minibatches(train_data_images, train_data_labels, train_batch_size, shuffle=True):
        optimizer.zero_grad()  # 清除梯度
        output = net(data)
        loss = criterion(output, target)  # 計算誤差
        loss.backward()
        optimizer.step()
        train_loss += loss.item()  # 計算1個epoch的loss和

        pred = torch.max(output, 1)  # max函式會返回兩個tensor,第一個tensor是每行的最大值;第二個tensor是每行最大值的索引。
        train_correct += np.sum(pred[1] == target) # 計算1個epoch的accuracy
        total += target.size(0)


Train_Accuracy = train_correct / total
Train_Loss = train_loss


#def iterate_minibatches(self, inputs, targets, batch_size, shuffle=True):
#     assert len(inputs) == len(targets)
#     if shuffle:
#         indices = np.arange(len(inputs))
#         np.random.shuffle(indices)
#
#     start_idx = None
#     for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
#         if shuffle:
#             excerpt = indices[start_idx:start_idx + batch_size]
#         else:
#             excerpt = slice(start_idx, start_idx + batch_size)
#         yield inputs[excerpt], targets[excerpt]
#
#     if start_idx is not None and start_idx + batch_size < len(inputs):
#         excerpt = indices[start_idx + batch_size:] if shuffle else slice(start_idx + batch_size, len(inputs))
#         yield inputs[excerpt], targets[excerpt]

5. 測試

for data, target in iterate_minibatches(self.test_x, self.test_y, self.test_batch_size, shuffle=False):
    output = net(data)
    
    loss = criterion(output, target)
    test_loss += loss.item()
    
    pred = torch.max(output, 1)
    test_correct += np.sum(pred[1] == target)
    total += target.size(0)


Test_Accuracy = test_correct / total
Test_Loss = test_loss

6. 儲存模型

model_out_path = "model.pth"
torch.save(net, model_out_path)
print("Checkpoint saved to {}".format(model_out_path))