1. 程式人生 > >faster rcnn 測試程式碼解釋

faster rcnn 測試程式碼解釋

def test_net(sess, net, imdb, weights_filename , max_per_image=300, thresh=0.05, vis=False):
    """Test a Fast R-CNN network on an image database."""
    num_images = len(imdb.image_index)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb det_file = os.path.join(output_dir, 'detections.pkl'
) # if os.path.exists(det_file): # with open(det_file, 'rb') as f: # all_boxes = cPickle.load(f) # 首先遍歷每一個圖片,輸入檢測函式,返回這張圖片中的多個提議區域的目標和分數 for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None
else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() # scores (ndarray): R x K array of object class scores (K includes background as object category 0) # boxes (ndarray): R x (4*K) array of predicted bounding boxes # 原來的輸入都是list,現在返回為矩陣 scores, boxes = im_detect(sess, net, im, box_proposals) #返回這張圖片中的多個目標和分數 detect_time = _t['im_detect'].toc(average=False) _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): # 一張圖片有R個目標提議區域,總共有K類,以下都是針對第j類的 inds = np.where(scores[:, j] > thresh)[0] # 將所有分數大於閾值的目標位置提出來 cls_scores = scores[inds, j] # 將對應的分數提出來 cls_boxes = boxes[inds, j*4:(j+1)*4] # 將對應的框提出來 cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # 將分數和框位置合併到一起稱為一個新的矩陣 keep = nms(cls_dets, cfg.TEST.NMS) # 通過NMS抑制,得到最好的keep個框,即一個圖片中存在的目標 cls_dets = cls_dets[keep, :] if vis: vis_detections(image, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets if vis: plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) # 將這個圖片所有目標的分數拼在一起 keep * num_class if len(image_scores) > max_per_image: #如果NMS後提議區域還是太多,那就取前max_per_image個,不過一般只有1,2個 image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc(average=False) print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, detect_time, nms_time) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)

畫圖解釋

all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

這裡寫圖片描述

for j in xrange(1, imdb.num_classes):
    # 一張圖片有R個目標提議區域,總共有K類,以下都是針對第j類的
    inds = np.where(scores[:, j] > thresh)[0] # 將所有分數大於閾值的目標位置提出來
    cls_scores = scores[inds, j] # 將對應的分數提出來
    cls_boxes = boxes[inds, j*4:(j+1)*4] # 將對應的框提出來
    cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
        .astype(np.float32, copy=False) # 將分數和框位置合併到一起稱為一個新的矩陣
    keep = nms(cls_dets, cfg.TEST.NMS) # 通過NMS抑制,得到最好的keep個框,即一個圖片中存在的目標
    cls_dets = cls_dets[keep, :]

這裡寫圖片描述