faster rcnn 測試程式碼解釋
阿新 • • 發佈:2019-01-22
def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False):
"""Test a Fast R-CNN network on an image database."""
num_images = len(imdb.image_index)
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
all_boxes = [[[] for _ in xrange(num_images)]
for _ in xrange(imdb.num_classes)]
output_dir = get_output_dir(imdb, weights_filename)
# timers
_t = {'im_detect' : Timer(), 'misc' : Timer()}
if not cfg.TEST.HAS_RPN:
roidb = imdb.roidb
det_file = os.path.join(output_dir, 'detections.pkl' )
# if os.path.exists(det_file):
# with open(det_file, 'rb') as f:
# all_boxes = cPickle.load(f)
# 首先遍歷每一個圖片,輸入檢測函式,返回這張圖片中的多個提議區域的目標和分數
for i in xrange(num_images):
# filter out any ground truth boxes
if cfg.TEST.HAS_RPN:
box_proposals = None
else:
# The roidb may contain ground-truth rois (for example, if the roidb
# comes from the training or val split). We only want to evaluate
# detection on the *non*-ground-truth rois. We select those the rois
# that have the gt_classes field set to 0, which means there's no
# ground truth.
box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]
im = cv2.imread(imdb.image_path_at(i))
_t['im_detect'].tic()
# scores (ndarray): R x K array of object class scores (K includes background as object category 0)
# boxes (ndarray): R x (4*K) array of predicted bounding boxes
# 原來的輸入都是list,現在返回為矩陣
scores, boxes = im_detect(sess, net, im, box_proposals) #返回這張圖片中的多個目標和分數
detect_time = _t['im_detect'].toc(average=False)
_t['misc'].tic()
if vis:
image = im[:, :, (2, 1, 0)]
plt.cla()
plt.imshow(image)
# skip j = 0, because it's the background class
for j in xrange(1, imdb.num_classes):
# 一張圖片有R個目標提議區域,總共有K類,以下都是針對第j類的
inds = np.where(scores[:, j] > thresh)[0] # 將所有分數大於閾值的目標位置提出來
cls_scores = scores[inds, j] # 將對應的分數提出來
cls_boxes = boxes[inds, j*4:(j+1)*4] # 將對應的框提出來
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) # 將分數和框位置合併到一起稱為一個新的矩陣
keep = nms(cls_dets, cfg.TEST.NMS) # 通過NMS抑制,得到最好的keep個框,即一個圖片中存在的目標
cls_dets = cls_dets[keep, :]
if vis:
vis_detections(image, imdb.classes[j], cls_dets)
all_boxes[j][i] = cls_dets
if vis:
plt.show()
# Limit to max_per_image detections *over all classes*
if max_per_image > 0:
image_scores = np.hstack([all_boxes[j][i][:, -1]
for j in xrange(1, imdb.num_classes)]) # 將這個圖片所有目標的分數拼在一起 keep * num_class
if len(image_scores) > max_per_image: #如果NMS後提議區域還是太多,那就取前max_per_image個,不過一般只有1,2個
image_thresh = np.sort(image_scores)[-max_per_image]
for j in xrange(1, imdb.num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
all_boxes[j][i] = all_boxes[j][i][keep, :]
nms_time = _t['misc'].toc(average=False)
print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
.format(i + 1, num_images, detect_time, nms_time)
with open(det_file, 'wb') as f:
cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
print 'Evaluating detections'
imdb.evaluate_detections(all_boxes, output_dir)
畫圖解釋
all_boxes = [[[] for _ in xrange(num_images)]
for _ in xrange(imdb.num_classes)]
for j in xrange(1, imdb.num_classes):
# 一張圖片有R個目標提議區域,總共有K類,以下都是針對第j類的
inds = np.where(scores[:, j] > thresh)[0] # 將所有分數大於閾值的目標位置提出來
cls_scores = scores[inds, j] # 將對應的分數提出來
cls_boxes = boxes[inds, j*4:(j+1)*4] # 將對應的框提出來
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False) # 將分數和框位置合併到一起稱為一個新的矩陣
keep = nms(cls_dets, cfg.TEST.NMS) # 通過NMS抑制,得到最好的keep個框,即一個圖片中存在的目標
cls_dets = cls_dets[keep, :]