線性模型和CNN實現MNIST資料集手寫數字辨識
阿新 • • 發佈:2020-09-07
用CNN實現
#1 import torch import torch.nn as nn import torch.nn.functional as F #用來製作Dataloader import torch.utils.data as Data import torch.optim as optim #MNIST資料集哎torchvision裡面 import torchvision #2 #訓練集一共有60000張圖片 使用小批量梯度下降 batch_size代表一次將64張圖片輸入到神經網路(每張圖片大小是1*28*28) batch_size = 64 transform = torchvision.transforms.Compose([ #將MNIST中的資料轉換為tensor torchvision.transforms.ToTensor(), #切換到標準差分佈 torchvision.transforms.Normalize((0.1307, ), (0.3081, )) ]) train_data = torchvision.datasets.MNIST('./dataset/mnist/', train= True, transform= transform) train_loader = Data.DataLoader(train_data, shuffle= True, batch_size= batch_size) test_date = torchvision.datasets.MNIST('./dataset/mnist/', train= False, transform= transform) test_loader = Data.DataLoader(test_date, shuffle= False, batch_size= batch_size) #3 class CNN(nn.Module): def __init__(self): super(CNN,self).__init__() #引數分別是:輸入的維度(一層,灰度圖), 輸出的維度(10層的feature map,用了10個kernel), kernel的大小 self.conv1 = nn.Conv2d(1,10,5) self.conv2 = nn.Conv2d(10,20,5) #maxpooling的大小是2(2*2中選取最大的) self.pooling = nn.MaxPool2d(2) #全連線網路 self.fc = nn.Linear(320,10) def forward(self,x): #這一步很重要 batch_size是64 訓練集一共有60000張照片 60000%64=32 所以最後一個batch裡只有32張照片 batch_size = x.size(0) #輸入大小: batch_size*1*28*28 輸出大小: batch_size*10*24*24(10個kernel,28-5+1=24) maxpooling之後: batch_size*10*12*12 x = F.relu(self.pooling(self.conv1(x))) #輸入大小: batch_size*10*12*12 輸出大小: batch_size*20*8*8 maxpooling之後:batch_size*20*4*4 x = F.relu(self.pooling(self.conv2(x))) #輸入大小: batch_size*20*4*4 經過view函式reshape之後: batch_size*320(這也是為什麼Linear(320,10)) x = x.view(batch_size,-1) x = self.fc(x) return x model = CNN() #4 #使用交叉熵作為損失函式 criterion = nn.CrossEntropyLoss() #用mini-batch stochastic gradient descent(小批量隨機梯度下降) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum= 0.5) #5 def train(epoch): running_loss = 0 for batch_idx, data in enumerate(train_loader): inputs, target = data #將引數清零 optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, target) loss.backward() optimizer.step() #為了不構建向量圖 取loss.item() 是一個標量 running_loss += loss.item() if batch_idx % 300 ==299: print("[%d,%5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300)) running_loss = 0 #6 def test(): correct = 0 total = 0 #當您確定不會呼叫時,禁用梯度計算對於推斷很有用Tensor.backward()。它將減少原本需要require_grad = True的計算的記憶體消耗。在這種模式下,即使輸入具有require_grad = True,每次計算的結果也將具有 require_grad = False。(來自pytorch文件) with torch.no_grad(): for data in test_loader: inputs, labels = data outputs = model(inputs) #對於每個影象,有10個數字分別預測值,取出10箇中最大的,也就是神經網路預測的最有可能是什麼數字 _, prediction = torch.max(outputs.data, dim = 1) total += labels.size(0) correct += (prediction == labels).sum().item() print("ACC: %d %%" % (100*correct/total)) #7 if __name__ == '__main__': # 設定迴圈次數 for epoch in range(5): train(epoch) test()
執行結果
用全連線的Linear模型實現
#1 import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.data as Data import torch.optim as optim import torchvision #2 batch_size = 64 transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307, ), (0.3081, )) ]) train_data = torchvision.datasets.MNIST('./dataset/mnist/', train= True, transform= transform) train_loader = Data.DataLoader(train_data, shuffle= True, batch_size= batch_size) test_date = torchvision.datasets.MNIST('./dataset/mnist/', train= False, transform= transform) test_loader = Data.DataLoader(test_date, shuffle= False, batch_size= batch_size) #3(使用全連線的網路) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = nn.Linear(784, 512) self.l2 = nn.Linear(512, 256) self.l3 = nn.Linear(256, 128) self.l4 = nn.Linear(128, 64) self.l5 = nn.Linear(64, 10) def forward(self, x): x = x.view(-1, 784) x = F.relu(self.l1(x)) x = F.relu(self.l2(x)) x = F.relu(self.l3(x)) x = F.relu(self.l4(x)) # 注意最後一層不做啟用 因為輸出要接到後面的softmax裡面 return self.l5(x) model = Net() #4 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum= 0.5) #5 def train(epoch): running_loss = 0 for batch_idx, data in enumerate(train_loader): inputs, target = data optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, target) loss.backward() optimizer.step() running_loss += loss.item() if batch_idx % 300 ==299: print("[%d,%5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300)) running_loss = 0 #6 def test(): correct = 0 total = 0 with torch.no_grad(): for data in test_loader: inputs, labels = data outputs = model(inputs) _, prediction = torch.max(outputs.data, dim = 1) total += labels.size(0) correct += (prediction == labels).sum().item() print("ACC: %d %%" % (100*correct/total)) #7 if __name__ == '__main__': for epoch in range(5): train(epoch) test()