1. 程式人生 > 實用技巧 >線性模型和CNN實現MNIST資料集手寫數字辨識

線性模型和CNN實現MNIST資料集手寫數字辨識

用CNN實現

#1
import torch 
import torch.nn as nn
import torch.nn.functional as F 
#用來製作Dataloader
import torch.utils.data as Data 
import torch.optim as optim
#MNIST資料集哎torchvision裡面
import torchvision

#2
#訓練集一共有60000張圖片 使用小批量梯度下降 batch_size代表一次將64張圖片輸入到神經網路(每張圖片大小是1*28*28)
batch_size = 64
transform = torchvision.transforms.Compose([
    #將MNIST中的資料轉換為tensor
    torchvision.transforms.ToTensor(),
    #切換到標準差分佈
    torchvision.transforms.Normalize((0.1307, ), (0.3081, ))
])

train_data = torchvision.datasets.MNIST('./dataset/mnist/', train= True, transform= transform)
train_loader = Data.DataLoader(train_data, shuffle= True, batch_size= batch_size)
test_date = torchvision.datasets.MNIST('./dataset/mnist/', train= False, transform= transform)
test_loader = Data.DataLoader(test_date, shuffle= False, batch_size= batch_size)

#3
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        #引數分別是:輸入的維度(一層,灰度圖), 輸出的維度(10層的feature map,用了10個kernel), kernel的大小
        self.conv1 = nn.Conv2d(1,10,5)
        self.conv2 = nn.Conv2d(10,20,5)
        #maxpooling的大小是2(2*2中選取最大的)
        self.pooling = nn.MaxPool2d(2)
        #全連線網路
        self.fc = nn.Linear(320,10)
        
    def forward(self,x):
        #這一步很重要 batch_size是64 訓練集一共有60000張照片 60000%64=32 所以最後一個batch裡只有32張照片 
        batch_size = x.size(0)
        #輸入大小: batch_size*1*28*28 輸出大小: batch_size*10*24*24(10個kernel,28-5+1=24) maxpooling之後: batch_size*10*12*12
        x = F.relu(self.pooling(self.conv1(x)))
        #輸入大小: batch_size*10*12*12 輸出大小: batch_size*20*8*8 maxpooling之後:batch_size*20*4*4
        x = F.relu(self.pooling(self.conv2(x)))
        #輸入大小: batch_size*20*4*4 經過view函式reshape之後: batch_size*320(這也是為什麼Linear(320,10))
        x = x.view(batch_size,-1)
        x = self.fc(x)
        return x
model = CNN()

#4
#使用交叉熵作為損失函式
criterion = nn.CrossEntropyLoss()
#用mini-batch stochastic gradient descent(小批量隨機梯度下降)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum= 0.5)

#5
def train(epoch):
    running_loss = 0
    for batch_idx, data in enumerate(train_loader):
        inputs, target = data
        #將引數清零
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
		
        #為了不構建向量圖 取loss.item() 是一個標量
        running_loss += loss.item()
        if batch_idx % 300 ==299:
            print("[%d,%5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0

#6
def test():
    correct = 0
    total = 0
    #當您確定不會呼叫時,禁用梯度計算對於推斷很有用Tensor.backward()。它將減少原本需要require_grad = True的計算的記憶體消耗。在這種模式下,即使輸入具有require_grad = True,每次計算的結果也將具有 require_grad = False。(來自pytorch文件)
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            #對於每個影象,有10個數字分別預測值,取出10箇中最大的,也就是神經網路預測的最有可能是什麼數字
            _, prediction = torch.max(outputs.data, dim = 1)
            total += labels.size(0)
            correct += (prediction == labels).sum().item()
    print("ACC: %d %%" % (100*correct/total))
    
#7    
if __name__ == '__main__':
    # 設定迴圈次數
    for epoch in range(5):
        train(epoch)
        test()

執行結果

用全連線的Linear模型實現

#1
import torch 
import torch.nn as nn
import torch.nn.functional as F 
import torch.utils.data as Data 
import torch.optim as optim
import torchvision

#2
batch_size = 64 
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307, ), (0.3081, ))
])

train_data = torchvision.datasets.MNIST('./dataset/mnist/', train= True, transform= transform)
train_loader = Data.DataLoader(train_data, shuffle= True, batch_size= batch_size)
test_date = torchvision.datasets.MNIST('./dataset/mnist/', train= False, transform= transform)
test_loader = Data.DataLoader(test_date, shuffle= False, batch_size= batch_size)

#3(使用全連線的網路)
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(784, 512)
        self.l2 = nn.Linear(512, 256)
        self.l3 = nn.Linear(256, 128)
        self.l4 = nn.Linear(128, 64)
        self.l5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        # 注意最後一層不做啟用 因為輸出要接到後面的softmax裡面
        return self.l5(x)
model = Net()

#4
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum= 0.5)

#5
def train(epoch):
    running_loss = 0
    for batch_idx, data in enumerate(train_loader):
        inputs, target = data
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 300 ==299:
            print("[%d,%5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0

#6
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, prediction = torch.max(outputs.data, dim = 1)
            total += labels.size(0)
            correct += (prediction == labels).sum().item()
    print("ACC: %d %%" % (100*correct/total))
    
#7    
if __name__ == '__main__':
    for epoch in range(5):
        train(epoch)
        test()