使用訓練好的caffe模型分類圖片(python版)
阿新 • • 發佈:2018-11-10
英文官方文件:http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
- 匯入python caffe包
import numpy as np import matplotlib.pyplot as plt # display plots in this notebook %matplotlib inline # set display defaults plt.rcParams['figure.figsize'] = (10, 10) # large images plt.rcParams['image.interpolation'] = 'nearest' # don't interpolate: show square pixels plt.rcParams['image.cmap'] = 'gray' # use grayscale output rather than a (potentially misleading) color heatmap import sys import os caffe_root = './' #指定caffe的根目錄 sys.path.insert(0, caffe_root + 'python') #將caffe python介面檔案路徑新增到python path中 import caffe # 判斷model檔案是否存在 if os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'): print 'CaffeNet found.' else: print 'Downloading pre-trained CaffeNet model...'
- 載入網路,建立輸入處理
使用python caffe.io.loadImage介面讀取圖片,返回的是[0-1]返回的np.float32陣列
def load_image(filename, color=True): """ Load an image converting from grayscale or alpha as needed. Parameters ---------- filename : string color : boolean flag for color format. True (default) loads as RGB while False loads as intensity (if image is already grayscale). Returns ------- image : an image with type np.float32 in range [0, 1] of size (H x W x 3) in RGB or of size (H x W x 1) in grayscale. """ img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32) if img.ndim == 2: img = img[:, :, np.newaxis] if color: img = np.tile(img, (1, 1, 3)) elif img.shape[2] == 4: img = img[:, :, :3] return img
python Transformer介面會對load_image讀取的圖片做處理,注意raw_scale實在減去均值和其他處理之前,而input_scale實在這些操作之後
def preprocess(self, in_, data): """ Format input for Caffe: - convert to single - resize to input dimensions (preserving number of channels) - transpose dimensions to K x H x W - reorder channels (for instance color to BGR) - scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models) - subtract mean - scale feature Parameters ---------- in_ : name of input blob to preprocess for data : (H' x W' x K) ndarray Returns ------- caffe_in : (K x H x W) ndarray for input to a Net """ self.__check_input(in_) caffe_in = data.astype(np.float32, copy=False) transpose = self.transpose.get(in_) channel_swap = self.channel_swap.get(in_) raw_scale = self.raw_scale.get(in_) mean = self.mean.get(in_) input_scale = self.input_scale.get(in_) in_dims = self.inputs[in_][2:] #1 resize大小 if caffe_in.shape[:2] != in_dims: caffe_in = resize_image(caffe_in, in_dims) #2 維度變換,H*W*C轉換成 C*H*W if transpose is not None: caffe_in = caffe_in.transpose(transpose) #3 通道變換 if channel_swap is not None: #RGB caffe_in = caffe_in[channel_swap, :, :] #4 raw_scale 讀取的圖片數值範圍在[0,1]時,raw_scale = 255,轉換成[0,255] if raw_scale is not None: caffe_in *= raw_scale #5 減去均值 if mean is not None: caffe_in -= mean # input_scale = 0.00390625時, 圖片資料轉換成[0,1] if input_scale is not None: caffe_in *= input_scale return caffe_in
# 使用cpu計算
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
# 載入網路
net = caffe.Net(model_def, # 模型定義檔案
model_weights, # 模型引數檔案
caffe.TEST) # 啟用測試模式 (e.g., don't perform dropout)
# 載入均值檔案,mu的shape是(3,256,256), mean(1)實在第一個維度上做均值,返回shape為(3,256)
# 再mean(1)後,返回形狀是(3),分別是rgb三個通道上均值
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
print 'mean-subtracted values:', zip('BGR', mu)
#mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]
# create transformer for the input called 'data'
# 建立一個轉換器,名字叫‘data’
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
# transformer會將channels變成最外面的維度, 即 (H,W,C) 變成(C, W, C)
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu) # 每個通道上減去均值
transformer.set_raw_scale('data', 255) # 從[0, 1]的範圍放大到[0, 255]
transformer.set_channel_swap('data', (2,1,0)) #修改通道順序,從RGB變成BGR
- 使用CPU分類
# 為了演示批處理,將輸入的batch size修改成50
net.blobs['data'].reshape(50, # batch size
3, # 3通道
227, 227) # 圖片大小為 227x227
# caffe.io.load_image讀取圖片值的範圍是0-1,cv2.imread讀取圖片值的範圍是0-255
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
# transformer進行圖片預處理,包括圖片值轉換到0-255
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)
# 圖片資料拷貝到net申請記憶體中
net.blobs['data'].data[...] = transformed_image
### 前向傳播,執行圖片分類。
output = net.forward()
# top blob可能有多個,使用'prob'索引,後面的0表示第一張圖片的輸出
output_prob = output['prob'][0]
# 獲取分類編號
print 'predicted class is:', output_prob.argmax()
# 輸出predicted class is: 281
- 驗證分裂是否正確是否正確
# 載入imageNet的label檔案
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
!../data/ilsvrc12/get_ilsvrc_aux.sh
labels = np.loadtxt(labels_file, str, delimiter='\t')
print 'output label:', labels[output_prob.argmax()]
# 輸出內容 output label: n02123045 tabby, tabby cat
# sort預設升序排列,反轉後全最大前五個
top_inds = output_prob.argsort()[::-1][:5] # reverse sort and take five largest items
print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])
'''[(0.31243637, 'n02123045 tabby, tabby cat'),
(0.2379719, 'n02123159 tiger cat'),
(0.12387239, 'n02124075 Egyptian cat'),
(0.10075711, 'n02119022 red fox, Vulpes vulpes'),
(0.070957087, 'n02127052 lynx, catamount')]
'''
- 使用GPU模式
# CPU計算耗時
%timeit net.forward()
# 1 loop, best of 3: 1.42 s per loop
# 設定使用gpu,有多個gpu時使用編號的gpu
caffe.set_device(0) # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward() # run once before timing to set up memory
%timeit net.forward()
# 10 loops, best of 3: 70.2 ms per loop