pytorch VGG11識別cifar10資料集(訓練+預測單張輸入圖片操作)
阿新 • • 發佈:2020-06-28
首先這是VGG的結構圖,VGG11則是紅色框裡的結構,共分五個block,如紅框中的VGG11第一個block就是一個conv3-64卷積層:
一,寫VGG程式碼時,首先定義一個 vgg_block(n,in,out)方法,用來構建VGG中每個block中的卷積核和池化層:
n是這個block中卷積層的數目,in是輸入的通道數,out是輸出的通道數
有了block以後,我們還需要一個方法把形成的block疊在一起,我們定義這個方法叫vgg_stack:
def vgg_stack(num_convs,channels): # vgg_net = vgg_stack((1,1,2,2),((3,64),(64,128),(128,256),(256,512),(512,512))) net = [] for n,c in zip(num_convs,channels): in_c = c[0] out_c = c[1] net.append(vgg_block(n,in_c,out_c)) return nn.Sequential(*net)
右邊的註釋
vgg_net = vgg_stack((1,512)))
裡,(1,2)表示五個block裡,各自的卷積層數目,((3,512))表示每個block中的卷積層的型別,如(3,64)表示這個卷積層輸入通道數是3,輸出通道數是64。vgg_stack方法返回的就是完整的vgg11模型了。
接著定義一個vgg類,包含vgg_stack方法:
#vgg類 class vgg(nn.Module): def __init__(self): super(vgg,self).__init__() self.feature = vgg_net self.fc = nn.Sequential( nn.Linear(512,100),nn.ReLU(True),nn.Linear(100,10) ) def forward(self,x): x = self.feature(x) x = x.view(x.shape[0],-1) x = self.fc(x) return x
最後:
net = vgg() #就能獲取到vgg網路
那麼構建vgg網路完整的pytorch程式碼是:
def vgg_block(num_convs,in_channels,out_channels): net = [nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1),nn.ReLU(True)] for i in range(num_convs - 1): # 定義後面的許多層 net.append(nn.Conv2d(out_channels,padding=1)) net.append(nn.ReLU(True)) net.append(nn.MaxPool2d(2,2)) # 定義池化層 return nn.Sequential(*net) # 下面我們定義一個函式對這個 vgg block 進行堆疊 def vgg_stack(num_convs,512))) net = [] for n,out_c)) return nn.Sequential(*net) #確定vgg的型別,是vgg11 還是vgg16還是vgg19 vgg_net = vgg_stack((1,512))) #vgg類 class vgg(nn.Module): def __init__(self): super(vgg,10) ) def forward(self,-1) x = self.fc(x) return x #獲取vgg網路 net = vgg()
基於VGG11的cifar10訓練程式碼:
import sys import numpy as np import torch from torch import nn from torch.autograd import Variable from torchvision.datasets import CIFAR10 import torchvision.transforms as transforms def vgg_block(num_convs,out_c)) return nn.Sequential(*net) #vgg類 class vgg(nn.Module): def __init__(self): super(vgg,-1) x = self.fc(x) return x # 然後我們可以訓練我們的模型看看在 cifar10 上的效果 def data_tf(x): x = np.array(x,dtype='float32') / 255 x = (x - 0.5) / 0.5 x = x.transpose((2,1)) ## 將 channel 放到第一維,只是 pytorch 要求的輸入方式 x = torch.from_numpy(x) return x transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5)),]) def get_acc(output,label): total = output.shape[0] _,pred_label = output.max(1) num_correct = (pred_label == label).sum().item() return num_correct / total def train(net,train_data,valid_data,num_epochs,optimizer,criterion): if torch.cuda.is_available(): net = net.cuda() for epoch in range(num_epochs): train_loss = 0 train_acc = 0 net = net.train() for im,label in train_data: if torch.cuda.is_available(): im = Variable(im.cuda()) label = Variable(label.cuda()) else: im = Variable(im) label = Variable(label) # forward output = net(im) loss = criterion(output,label) # forward optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += get_acc(output,label) if valid_data is not None: valid_loss = 0 valid_acc = 0 net = net.eval() for im,label in valid_data: if torch.cuda.is_available(): with torch.no_grad(): im = Variable(im.cuda()) label = Variable(label.cuda()) else: with torch.no_grad(): im = Variable(im) label = Variable(label) output = net(im) loss = criterion(output,label) valid_loss += loss.item() valid_acc += get_acc(output,label) epoch_str = ( "Epoch %d. Train Loss: %f,Train Acc: %f,Valid Loss: %f,Valid Acc: %f," % (epoch,train_loss / len(train_data),train_acc / len(train_data),valid_loss / len(valid_data),valid_acc / len(valid_data))) else: epoch_str = ("Epoch %d. Train Loss: %f," % (epoch,train_acc / len(train_data))) # prev_time = cur_time print(epoch_str) if __name__ == '__main__': # 作為例項,我們定義一個稍微簡單一點的 vgg11 結構,其中有 8 個卷積層 vgg_net = vgg_stack((1,512))) print(vgg_net) train_set = CIFAR10('./data',train=True,transform=transform,download=True) train_data = torch.utils.data.DataLoader(train_set,batch_size=64,shuffle=True) test_set = CIFAR10('./data',train=False,download=True) test_data = torch.utils.data.DataLoader(test_set,batch_size=128,shuffle=False) net = vgg() optimizer = torch.optim.SGD(net.parameters(),lr=1e-1) criterion = nn.CrossEntropyLoss() #損失函式為交叉熵 train(net,test_data,50,criterion) torch.save(net,'vgg_model.pth')
結束後,會出現一個模型檔案vgg_model.pth
二,然後網上找張圖片,把圖片縮成32x32,放到預測程式碼中,即可有預測結果出現,預測程式碼如下:
import torch import cv2 import torch.nn.functional as F from vgg2 import vgg ##重要,雖然顯示灰色(即在次程式碼中沒用到),但若沒有引入這個模型程式碼,載入模型時會找不到模型 from torch.autograd import Variable from torchvision import datasets,transforms import numpy as np classes = ('plane','car','bird','cat','deer','dog','frog','horse','ship','truck') if __name__ == '__main__': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = torch.load('vgg_model.pth') # 載入模型 model = model.to(device) model.eval() # 把模型轉為test模式 img = cv2.imread("horse.jpg") # 讀取要預測的圖片 trans = transforms.Compose( [ transforms.ToTensor(),0.5)) ]) img = trans(img) img = img.to(device) img = img.unsqueeze(0) # 圖片擴充套件多一維,因為輸入到儲存的模型中是4維的[batch_size,通道,長,寬],而普通圖片只有三維,[通道,長,寬] # 擴充套件後,為[1,1,28,28] output = model(img) prob = F.softmax(output,dim=1) #prob是10個分類的概率 print(prob) value,predicted = torch.max(output.data,1) print(predicted.item()) print(value) pred_class = classes[predicted.item()] print(pred_class) # prob = F.softmax(output,dim=1) # prob = Variable(prob) # prob = prob.cpu().numpy() # 用GPU的資料訓練的模型儲存的引數都是gpu形式的,要顯示則先要轉回cpu,再轉回numpy模式 # print(prob) # prob是10個分類的概率 # pred = np.argmax(prob) # 選出概率最大的一個 # # print(pred) # # print(pred.item()) # pred_class = classes[pred] # print(pred_class)
縮成32x32的圖片:
執行結果:
以上這篇pytorch VGG11識別cifar10資料集(訓練+預測單張輸入圖片操作)就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支援我們。