MxNet 遷移學習實現深度學習分類
阿新 • • 發佈:2019-01-22
利用MxNet實現影象分類任務
這篇文章將利用MxNet以及其前端gluon 實現一個完整的影象分類任務,其中主要包括以下幾個方面:
- 影象I/O
- 搭建網路
- 進行訓練
- 驗證演算法
- 輸出結果
1. 訓練資料I/O
將處理好的訓練資料讀入,進行訓練。
訓練資料的格式基本按照一個子類一個子資料夾的形式保持,具體可以參考MXNet的資料I/O
1.1 程式的第一步,首先匯入相關的包
#import some packages
import sys
import collections
import datetime #用於計時
import gluonbook as gb #用於匯入一些功能函式
import math
import numpy as np
import mxnet as mx #mxnet
from mxnet import autograd, gluon, init, nd, image #匯入自動梯度,gluon前端,影象等模組
from mxnet.gluon import data as gdata, loss as gloss, model_zoo, nn #匯入模型相關模組
import os
import shutil #用於預處理複製檔案
import zipfile
import matplotlib.pyplot as plt #繪圖工具匯入
1.2 隨後定義精度計算函式、影象增廣函式等輔助函式
# 影象增廣和輔助函式
# 計算 Average Precision
def calculate_ap(labels, outputs):
cnt = 0
ap = 0.
for label, output in zip(labels, outputs):
for lb, op in zip(label.asnumpy().astype(np.int),
output.asnumpy( )):
op_argsort = np.argsort(op)[::-1] #輸出排序後的index,最大概率的值對應的index
lb_int = int(lb) #標籤對應的整數
ap += 1.0 / (1+list(op_argsort).index(lb_int)) #精度計算 正確的個數
cnt += 1
return ((ap, cnt))
# 訓練集圖片增廣
def transform_train(data, label):
im = data.astype('float32') / 255 #歸併到0~1之間
#影象增強的函式組定義,並利用ImageNet的預訓練均值、方差歸一化輸入影象
auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256,
rand_crop=True, rand_mirror=True,
mean = np.array([0.485, 0.456, 0.406]),
std = np.array([0.229, 0.224, 0.225]))
for aug in auglist:
im = aug(im)
im = nd.transpose(im, (2,0,1)) #改變
return (im, nd.array([label]).asscalar())
# 驗證集圖片增廣,沒有隨機裁剪和翻轉
def transform_val(data, label):
im = data.astype('float32') / 255
auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256,
mean = np.array([0.485, 0.456, 0.406]),
std = np.array([0.229, 0.224, 0.225]))
for aug in auglist:
im = aug(im)
im = nd.transpose(im, (2,0,1)) #改變格式為 channel width height
return (im, nd.array([label]).asscalar())
# 在驗證集上預測並評估
def validate(net, val_data, ctx):
metric = mx.metric.Accuracy()
L = gluon.loss.SoftmaxCrossEntropyLoss()
AP = 0.
AP_cnt = 0
val_loss = 0
for i, batch in enumerate(val_data):
data = gluon.utils.split_and_load(batch[0], ctx_list=ctx,
batch_axis=0, even_split=False)
label = gluon.utils.split_and_load(batch[1], ctx_list=ctx,
batch_axis=0, even_split=False)
outputs = [net(X) for X in data]
metric.update(label, outputs)
loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
val_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) #平均損失
ap, cnt = calculate_ap(label, outputs)
AP += ap
AP_cnt += cnt #精度也要求平均
_, val_acc = metric.get()
return ((val_acc, AP / AP_cnt, val_loss / len(val_data)))
1.3 讀取訓練和驗證資料
這時候可以利用gluon的內建函式來對資料進行讀取了,只需要輸入對應資料的資料夾即可,參考MXNet I/O
#讀取資料檔案
train_set = gdata.vision.ImageFolderDataset('./train_dis/',flag=1)
valid_set = gdata.vision.ImageFolderDataset('./valid_dis/',flag=1)
#check data classes
print(train_set) #check資料的長度是否正確,應為訓練影象總數量
print(train_set.synsets) #also has items attributes,現實分類別是否正確,應為類別數目
print(valid_set)
print(valid_set.synsets) #also has items attributes
<mxnet.gluon.data.vision.datasets.ImageFolderDataset object at 0x7fb3d6e06710>
['0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60', '7', '8', '9']
<mxnet.gluon.data.vision.datasets.ImageFolderDataset object at 0x7fb3d6e06668>
['0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60', '7', '8', '9']
得到輸入序列後,將影象讀入迭代器中,根據視訊記憶體設定批量的大小。
#data into iter and realized argumentation
batch_size = 64 #32--2821M could be 64
train_iter = gdata.DataLoader(train_set.transform(transform_train),
batch_size, shuffle=True, last_batch='keep', num_workers=4)
valid_iter = gdata.DataLoader(valid_set.transform(transform_val),
batch_size, shuffle=True, last_batch='keep', num_workers=4)
讀入後check迭代器的資料,並顯示目測
# check the data set in iter
print("trainiter lenght is: %d"%len(train_iter))
import matplotlib.pyplot as plt
for imgs, labels in train_iter:
print(labels) #列印label 對應類別label
print(imgs.shape) #檢視batch影象的維度
break #讀入一個batch
#show images
nor_parms = [[0.485, 0.456, 0.406],[0.229, 0.224, 0.225]]
#_,figs = plt.subplots(8,4,figsize=(8,4))
for i in range(8):
for j in range(4):
x = nd.transpose(imgs[i*4+j,:,:,:],(1,2,0)).asnumpy()
print(x.shape,type(x)) #檢視batch中影象的維度和型別
#x[:,:,0]*nor_parms[0][0]+nor_parms[1][0]
#x[:,:,1]*nor_parms[0][1]+nor_parms[1][1]
#x[:,:,2]*nor_parms[0][2]+nor_parms[1][2]
plt.imshow(x)
plt.show()
break
trainiter lenght is: 512 #總共有512個batch,每個batch有64個訓練資料
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
[35. 0. 31. 19. 38. 33. 35. 33. 19. 25. 16. 26. 36. 52. 18. 16. 27. 23.
19. 4. 19. 38. 38. 11. 41. 36. 22. 36. 29. 57. 26. 55. 18. 55. 55. 16.
27. 26. 55. 10. 19. 21. 23. 19. 50. 56. 31. 14. 20. 19. 8. 54. 57. 8.
52. 19. 56. 57. 17. 42. 18. 0. 23. 55.]
<NDArray 64 @cpu_shared(0)>
(64, 3, 224, 224)
(224, 224, 3) <class 'numpy.ndarray'>
2.定義模型
這裡主要使用遷移學習的方式,利用預訓練模型抽取影象的基本特徵,而後只需要訓練最後的輸出層來進行分類。
#define the net work by pre-train
def get_net(ctx):
resnet = model_zoo.vision.resnet50_v2(pretrained=True) #ctx 使用resnet_50作為基本網路抽取特徵
resnet.output_new = nn.HybridSequential(prefix='') #output is the origin 得到特徵,新定義一個輸出
#add two fcn for finetune
resnet.output_new.add(nn.Dense(256,activation = 'relu')) #在模型基礎上,定義最後兩個全連線層
resnet.output_new.add(nn.Dense(61))
#initialize
resnet.output_new.initialize(init.Xavier(),ctx=ctx) #for fintune
resnet.collect_params().reset_ctx(ctx) #for whole net
return resnet
定義損失函式,這裡主要使用分類的softmax交叉熵
來作為損失。
#for loss
loss = gloss.SoftmaxCrossEntropyLoss() #分類損失交叉熵
def get_loss(data,net,ctx):
l=0.0 #loss
for X,y in data:
y = y.as_in_context(ctx)
#計算預訓練模型輸出的特徵
out_features = net.features(X.as_in_context(ctx))
outputs = net.output_new(out_features) #final output
l += loss(outputs,y).mean().asscalar() #loss for the process
return l/len(data)
2.1定義訓練過程
完成了以上的準備工作,讀入了資料、定義好了網路和損失,我們可以開始進行訓練了,訓練函式定義如下,輸入為網路模型、資料、訓練epochs、學習率、衰減
等:
#def trainning process, trainer, epochscircles, lossback, valide
def train(net,train_iter,valid_iter,num_epochs, lr, wd, ctx, lr_period, lr_decay):
trainer = gluon.Trainer(net.output_new.collect_params(), 'sgd',
{'learning_rate':lr, 'momentum':0.9, 'wd': wd})
plot_loss = [] #plot loss
tic = datetime.datetime.now()
print('Traing is begining, please waiting......')
for epoch in range(num_epochs):
train_l = 0.0 #儲存訓練loss
counter = 0 #訓練batch週期計數器
#if epoch >0 and epoch %lr_period==0: #every period step update lr
trainer.set_learning_rate(trainer.learning_rate*lr_decay) #every steps updata lr
#print("There are %d data could train network"%len(train_iter))
for X,y in train_iter: #X~32(batch)*1024(iter)= 32768
#output for process reminding
counter +=1
if counter % 256 ==0:
print('processd %d images'%(counter*batch_size)) #一定批量就顯示處理過程
#output finished
y = y.astype('float32').as_in_context(ctx)
#feature
out_features = net.features(X.as_in_context(ctx)) #預訓練直接前傳得到特徵,未來這一步可以一次性做
#partly training fineturning
with autograd.record():
#features to output, just use features as input
outputs = net.output_new(out_features) #這裡只bp最後兩層,只訓練最後新定義的部分
l = loss(outputs, y)
l.backward()
#for next batch
trainer.step(batch_size)
train_l += l.mean().asscalar()
#log time into
toc = datetime.datetime.now()
h, remainder = divmod((toc - tic).seconds, 3600)
m, s = divmod(remainder, 60)
time_s = "time %02d:%02d:%02d" % (h, m, s)
#validata
if valid_iter is not None: #驗證資料,驗證訓練效果
valid_loss = get_loss(valid_iter, net, ctx)
epoch_s = ("epoch %d, train loss is %f, valid loss is %f :D "
%(epoch+1, train_l/len(train_iter),valid_loss))
else:
epoch_s = ("epoch %d, train loss is %f :D"
%(epoch+1, train_l/len(train_iter)))
tic = toc
print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
#plot loss
plot_loss.append(train_l/len(train_iter))
plt.plot(plot_loss) #將損失優化結果儲存到圖裡
plt.savefig("./training_loss.png")
2.2 開始訓練
ctx = gb.try_gpu();num_epochs = 1000;lr = 0.01;wd = 1e-4;lr_period = 10;lr_decay = 0.99;
net = get_net(ctx) #將網路和資料定義到gpu上
train(net,train_iter,valid_iter,num_epochs, lr, wd, ctx, lr_period, lr_decay) #訓練
net.output_new.collect_params().save('./output_new_2_1000.params') #訓練結束後儲存引數
#net.output_new.save_params('./output_new_50.params')
Traing is begining, please waiting......
processd xxxxx images
processd xxxxx images
epoch 1, train loss is 1.234988, valid loss is 0.776764 :Dtime 00:04:10, lr 0.0099
3.測試
在訓練完成得到模型後,我們需要對資料進行測試。同樣需要讀入資料,並利用網路進行分類。
#prepaer data
test_set = gdata.vision.ImageFolderDataset('./test_dis/',flag=1)
print("There are %d test imgs"%len(test_set))
There are xxxx test imgs
定義影象讀入函式
def plot_image(img_path):
with open(img_path, 'rb') as f:
img = image.imdecode(f.read()) #讀入輸入
#plt.imshow(img.asnumpy())
return img
接下來就是測試過程了:
#predict process
preds = []
count_p=0
for img_path,label in test_set.items: #將載入列表中每一張測試圖進行分類
img = plot_image(img_path)
data, _ = transform_val(img, 0)
data = data.expand_dims(axis=0)
#plt.imshow(img.asnumpy())
#plt.show()
#print(img_path)
#break
# 計算預訓練模型輸出層的輸入,即特徵。
output_features = net.features(data.as_in_context(mx.gpu()))
# 將特徵作為我們定義的輸出網路的輸入,計算輸出。
output = nd.softmax(net.output_new(output_features))
preds.extend(output.asnumpy())
count_p +=1
#print(count_p)
if count_p%100==0:
print("processed %d imgs"%count_p)
processed 100 imgs
可以根據需要將生成的預測結果preds
儲存為json檔案:
# use the tese_set name and predict results
with open('submission.json', 'w') as f:
f.write("[")
for i in range(len(preds)):
if i==len(preds)-1:
f.write("{"+"\"image_id\": "+"\""+test_set.items[i][0].split('/')[-1]+"\""+','+"\"xxxx_class\":"+str(preds[i].argmax())+'}')
else:
f.write("{"+"\"image_id\": "+"\""+test_set.items[i][0].split('/')[-1]+"\""+','<