【Mask R-CNN】(十一):程式碼理解inspect_model.ipynb
阿新 • • 發佈:2019-02-15
一、導包
import os import sys import random import math import re import time import numpy as np import tensorflow as tf import matplotlib import matplotlib.pyplot as plt import matplotlib.patches as patches #設定根目錄 ROOT_DIR = os.path.abspath("../../") #匯入Mask RCNN sys.path.append(ROOT_DIR) from mrcnn import utils from mrcnn import visualize from mrcnn.visualize import display_images import mrcnn.model as modellib from mrcnn.model import log %matplotlib inline #儲存log和model的目錄 MODEL_DIR = os.path.join(ROOT_DIR, "logs") #預訓練權重檔案的路徑 COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") #下載COCO預訓練權重檔案 if not os.path.exists(COCO_MODEL_PATH): utils.download_trained_weights(COCO_MODEL_PATH) #Shapes資料集預訓練權重檔案的路徑 SHAPES_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_shapes.h5")
二、配置
#以下程式碼塊二選其一即可 # Shapes toy資料集 # import shapes # config = shapes.ShapesConfig() # MS COCO資料集 import coco config = coco.CocoConfig() COCO_DIR = "path/to/COCO dataset" #預測時,對訓練時的配置做一些小的修改. class InferenceConfig(config.__class__): # Run detection on one image at a time GPU_COUNT = 1 IMAGES_PER_GPU = 1 config = InferenceConfig() config.display()
三、Notebook Preferences
#選擇載入神經網路的裝置. #當你同時在該裝置上訓練模型的時候這個引數就比較有用了 #你可以使用CPU,將GPU留作訓練用 DEVICE = "/cpu:0" # /cpu:0 or /gpu:0 #檢查model是用於訓練還是用於預測 #值: 'inference' 或 'training' # TODO: 'training'測試模式的程式碼還沒實現 TEST_MODE = "inference" def get_ax(rows=1, cols=1, size=16): """返回一個在該notebook中用於所有視覺化的Matplotlib Axes array。 提供一箇中央點座標來控制graph的尺寸。 調整attribute的尺寸來控制渲染多大的影象 """ _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows)) return ax
四、載入驗證資料集
# 構造驗證資料集
if config.NAME == 'shapes':
dataset = shapes.ShapesDataset()
dataset.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
elif config.NAME == "coco":
dataset = coco.CocoDataset()
dataset.load_coco(COCO_DIR, "minival")
#在使用資料集之前必須呼叫下面的語句
dataset.prepare()
print("Images: {}\nClasses: {}".format(len(dataset.image_ids), dataset.class_names))
五、載入model
#建立一個用於預測的model
with tf.device(DEVICE):
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
config=config)
#設定weights檔案路徑
if config.NAME == "shapes":
weights_path = SHAPES_MODEL_PATH
elif config.NAME == "coco":
weights_path = COCO_MODEL_PATH
#或者取消下面的註釋行,載入最近訓練的模型
# weights_path = model.find_last()
#載入weights
print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)
六、執行檢測
image_id = random.choice(dataset.image_ids)
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)
info = dataset.image_info[image_id]
print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id,
dataset.image_reference(image_id)))
#執行物體檢測
results = model.detect([image], verbose=1)
#顯示結果
ax = get_ax(1)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
dataset.class_names, r['scores'], ax=ax,
title="Predictions")
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)
image ID: coco.392144 (34940) http://cocodataset.org/#explore?id=392144
Processing 1 images
image shape: (1024, 1024, 3) min: 0.00000 max: 255.00000
molded_images shape: (1, 1024, 1024, 3) min: -123.70000 max: 151.10000
image_metas shape: (1, 89) min: 0.00000 max: 1024.00000
gt_class_id shape: (10,) min: 1.00000 max: 40.00000
gt_bbox shape: (10, 5) min: 0.00000 max: 1024.00000
gt_mask shape: (1024, 1024, 10) min: 0.00000 max: 1.00000
6.1 Precision-Recall
#畫出precision-recall的曲線
AP, precisions, recalls, overlaps = utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
r['rois'], r['class_ids'], r['scores'], r['masks'])
visualize.plot_precision_recall(AP, precisions, recalls)
# 顯示ground truth和預測的網格
visualize.plot_overlaps(gt_class_id, r['class_ids'], r['scores'],
overlaps, dataset.class_names)
6.2 計算mAP @ IoU=50
#計算VOC-style平均精度
def compute_batch_ap(image_ids):
APs = []
for image_id in image_ids:
#載入影象
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
modellib.load_image_gt(dataset, config,
image_id, use_mini_mask=False)
#執行物體檢測
results = model.detect([image], verbose=0)
#計算AP
r = results[0]
AP, precisions, recalls, overlaps =\
utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
r['rois'], r['class_ids'], r['scores'], r['masks'])
APs.append(AP)
return APs
#隨機選擇一些影象
image_ids = np.random.choice(dataset.image_ids, 10)
APs = compute_batch_ap(image_ids)
print("mAP @ IoU=50: ", np.mean(APs))
七、詳細的預測步驟
7.1 Region Proposal Network
Region Proposal Network(RPN)在影象的眾多boxes(anchors)中執行一個輕量級的二值分類,並且返回是目標物體或者不是目標物體的分數。那些獲得判斷是目標物體的高分數anchors將會傳遞給下一階段進行分類。
通常,即使是positive anchors也不會覆蓋全部物體。所以RPN還會進行一個迴歸優化來獲得更正確的邊界,包括平移和縮放anchors。
7.1.1 RPN Targets
RPN targets是RPN的訓練值。為了生成這些targets,我們用覆蓋全圖的不同尺度的anchors,然後計算這些anchors和ground truth的IoU。與ground truth的IoU>=0.7的認為是positive anchors,IoU<0.3的是negative anchors。IoU>=0.3並且IoU<0.7的是neutral anchors,將不會用於訓練。
為了訓練一個RPN regressor,我們還需要計算能使anchor完全覆蓋ground truth的偏移和縮放量。
#生成RPN trainig targets
# target_rpn_match=1是positive anchors, -1是negative anchors
# 0是neutral anchors.
target_rpn_match, target_rpn_bbox = modellib.build_rpn_targets(
image.shape, model.anchors, gt_class_id, gt_bbox, model.config)
log("target_rpn_match", target_rpn_match)
log("target_rpn_bbox", target_rpn_bbox)
positive_anchor_ix = np.where(target_rpn_match[:] == 1)[0]
negative_anchor_ix = np.where(target_rpn_match[:] == -1)[0]
neutral_anchor_ix = np.where(target_rpn_match[:] == 0)[0]
positive_anchors = model.anchors[positive_anchor_ix]
negative_anchors = model.anchors[negative_anchor_ix]
neutral_anchors = model.anchors[neutral_anchor_ix]
log("positive_anchors", positive_anchors)
log("negative_anchors", negative_anchors)
log("neutral anchors", neutral_anchors)
#將refinement deltas應用於positive anchors
refined_anchors = utils.apply_box_deltas(
positive_anchors,
target_rpn_bbox[:positive_anchors.shape[0]] * model.config.RPN_BBOX_STD_DEV)
log("refined_anchors", refined_anchors, )
#顯示refinement (點)之前的positive anchors和refinement (線)之後的positive anchors.
visualize.draw_boxes(image, boxes=positive_anchors, refined_boxes=refined_anchors, ax=get_ax())
target_rpn_match shape: (65472,) min: -1.00000 max: 1.00000
target_rpn_bbox shape: (256, 4) min: -5.19860 max: 2.59641
positive_anchors shape: (14, 4) min: 5.49033 max: 973.25483
negative_anchors shape: (242, 4) min: -22.62742 max: 1038.62742
neutral anchors shape: (65216, 4) min: -362.03867 max: 1258.03867
refined_anchors shape: (14, 4) min: 0.00000 max: 1023.99994
7.1.2 RPN預測
#執行RPN sub-graph
pillar = model.keras_model.get_layer("ROI").output # node to start searching from
nms_node = model.ancestor(pillar, "ROI/rpn_non_max_suppression:0")
if nms_node is None:
nms_node = model.ancestor(pillar, "ROI/rpn_non_max_suppression/NonMaxSuppressionV2:0")
rpn = model.run_graph([image], [
("rpn_class", model.keras_model.get_layer("rpn_class").output),
("pre_nms_anchors", model.ancestor(pillar, "ROI/pre_nms_anchors:0")),
("refined_anchors", model.ancestor(pillar, "ROI/refined_anchors:0")),
("refined_anchors_clipped", model.ancestor(pillar, "ROI/refined_anchors_clipped:0")),
("post_nms_anchor_ix", nms_node),
("proposals", model.keras_model.get_layer("ROI").output),
])
#顯示得分較高的anchors(refinement之前)
limit = 100
sorted_anchor_ids = np.argsort(rpn['rpn_class'][:,:,1].flatten())[::-1]
visualize.draw_boxes(image, boxes=model.anchors[sorted_anchor_ids[:limit]], ax=get_ax())
#顯示refinement之後的anchors.之後將超出影象邊界的裁剪掉
limit = 50
ax = get_ax(1, 2)
visualize.draw_boxes(image, boxes=rpn["pre_nms_anchors"][0, :limit],
refined_boxes=rpn["refined_anchors"][0, :limit], ax=ax[0])
visualize.draw_boxes(image, refined_boxes=rpn["refined_anchors_clipped"][0, :limit], ax=ax[1])
#顯示NMS優化後的anchors
limit = 50
ixs = rpn["post_nms_anchor_ix"][:limit]
visualize.draw_boxes(image, refined_boxes=rpn["refined_anchors_clipped"][0, ixs], ax=get_ax())
#顯示最終的proposals
#這和前一步的結果一樣(NMS優化), 但是將座標規範化到[0, 1].
limit = 50
#顯示之後轉化回影象座標
h, w = config.IMAGE_SHAPE[:2]
proposals = rpn['proposals'][0, :limit] * np.array([h, w, h, w])
visualize.draw_boxes(image, refined_boxes=proposals, ax=get_ax())
#測試RPN recall (被anchors覆蓋的物體的比例)
#這裡我們用三種不同的方法測試recall:
# - 全部anchors
# - 所有refined anchors
# - NMS之後的Refined anchors
iou_threshold = 0.7
recall, positive_anchor_ids = utils.compute_recall(model.anchors, gt_bbox, iou_threshold)
print("All Anchors ({:5}) Recall: {:.3f} Positive anchors: {}".format(
model.anchors.shape[0], recall, len(positive_anchor_ids)))
recall, positive_anchor_ids = utils.compute_recall(rpn['refined_anchors'][0], gt_bbox, iou_threshold)
print("Refined Anchors ({:5}) Recall: {:.3f} Positive anchors: {}".format(
rpn['refined_anchors'].shape[1], recall, len(positive_anchor_ids)))
recall, positive_anchor_ids = utils.compute_recall(proposals, gt_bbox, iou_threshold)
print("Post NMS Anchors ({:5}) Recall: {:.3f} Positive anchors: {}".format(
proposals.shape[0], recall, len(positive_anchor_ids)))
All Anchors (65472) Recall: 0.400 Positive anchors: 8
Refined Anchors (10000) Recall: 0.900 Positive anchors: 65
Post NMS Anchors ( 50) Recall: 0.800 Positive anchors: 9
7.2 Proposal分類
7.2.1 Proposal Classification
執行分類器以產生分類概率和bounding box迴歸。
#獲取classifier和mask的輸入和輸出.
mrcnn = model.run_graph([image], [
("proposals", model.keras_model.get_layer("ROI").output),
("probs", model.keras_model.get_layer("mrcnn_class").output),
("deltas", model.keras_model.get_layer("mrcnn_bbox").output),
("masks", model.keras_model.get_layer("mrcnn_mask").output),
("detections", model.keras_model.get_layer("mrcnn_detection").output),
])
#獲取檢測的class IDs.修剪zero padding.
det_class_ids = mrcnn['detections'][0, :, 4].astype(np.int32)
det_count = np.where(det_class_ids == 0)[0][0]
det_class_ids = det_class_ids[:det_count]
detections = mrcnn['detections'][0, :det_count]
print("{} detections: {}".format(
det_count, np.array(dataset.class_names)[det_class_ids]))
captions = ["{} {:.3f}".format(dataset.class_names[int(c)], s) if c > 0 else ""
for c, s in zip(detections[:, 4], detections[:, 5])]
visualize.draw_boxes(
image,
refined_boxes=utils.denorm_boxes(detections[:, :4], image.shape[:2]),
visibilities=[2] * len(detections),
captions=captions, title="Detections",
ax=get_ax())
7.2.2 檢測的步驟
# Proposals的座標是規範化的座標. 將它們縮放到影象座標.
h, w = config.IMAGE_SHAPE[:2]
proposals = np.around(mrcnn["proposals"][0] * np.array([h, w, h, w])).astype(np.int32)
# 每個proposal的Class ID, score, and mask
roi_class_ids = np.argmax(mrcnn["probs"][0], axis=1)
roi_scores = mrcnn["probs"][0, np.arange(roi_class_ids.shape[0]), roi_class_ids]
roi_class_names = np.array(dataset.class_names)[roi_class_ids]
roi_positive_ixs = np.where(roi_class_ids > 0)[0]
#有多少ROIs和空行?
print("{} Valid proposals out of {}".format(np.sum(np.any(proposals, axis=1)), proposals.shape[0]))
print("{} Positive ROIs".format(len(roi_positive_ixs)))
# Class數量
print(list(zip(*np.unique(roi_class_names, return_counts=True))))
#顯示一個隨機樣本的proposals.
#分類為背景的Proposals是點,其他的顯示它們的類名和置信分數.
limit = 200
ixs = np.random.randint(0, proposals.shape[0], limit)
captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else ""
for c, s in zip(roi_class_ids[ixs], roi_scores[ixs])]
visualize.draw_boxes(image, boxes=proposals[ixs],
visibilities=np.where(roi_class_ids[ixs] > 0, 2, 1),
captions=captions, title="ROIs Before Refinement",
ax=get_ax())
應用bounding box優化。
#指定類別的bounding box偏移.
roi_bbox_specific = mrcnn["deltas"][0, np.arange(proposals.shape[0]), roi_class_ids]
log("roi_bbox_specific", roi_bbox_specific)
#應用bounding box變換
#形狀: [N, (y1, x1, y2, x2)]
refined_proposals = utils.apply_box_deltas(
proposals, roi_bbox_specific * config.BBOX_STD_DEV).astype(np.int32)
log("refined_proposals", refined_proposals)
#顯示positive proposals
# ids = np.arange(roi_boxes.shape[0]) #顯示所有
limit = 5
ids = np.random.randint(0, len(roi_positive_ixs), limit) #隨機顯示樣本
captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else ""
for c, s in zip(roi_class_ids[roi_positive_ixs][ids], roi_scores[roi_positive_ixs][ids])]
visualize.draw_boxes(image, boxes=proposals[roi_positive_ixs][ids],
refined_boxes=refined_proposals[roi_positive_ixs][ids],
visibilities=np.where(roi_class_ids[roi_positive_ixs][ids] > 0, 1, 0),
captions=captions, title="ROIs After Refinement",
ax=get_ax())
roi_bbox_specific shape: (1000, 4) min: -2.44748 max: 2.94838
refined_proposals shape: (1000, 4) min: -8.00000 max: 1028.00000
過濾掉低置信度的檢測結果。
#去掉那些被分類為背景的boxes
keep = np.where(roi_class_ids > 0)[0]
print("Keep {} detections:\n{}".format(keep.shape[0], keep))
#去掉低置信度的檢測結果
keep = np.intersect1d(keep, np.where(roi_scores >= config.DETECTION_MIN_CONFIDENCE)[0])
print("Remove boxes below {} confidence. Keep {}:\n{}".format(
config.DETECTION_MIN_CONFIDENCE, keep.shape[0], keep))
為每一個類別做NMS。
#為每一個類別做NMS
pre_nms_boxes = refined_proposals[keep]
pre_nms_scores = roi_scores[keep]
pre_nms_class_ids = roi_class_ids[keep]
nms_keep = []
for class_id in np.unique(pre_nms_class_ids):
#選擇該類的檢測結果
ixs = np.where(pre_nms_class_ids == class_id)[0]
#做NMS
class_keep = utils.non_max_suppression(pre_nms_boxes[ixs],
pre_nms_scores[ixs],
config.DETECTION_NMS_THRESHOLD)
#對映索引
class_keep = keep[ixs[class_keep]]
nms_keep = np.union1d(nms_keep, class_keep)
print("{:22}: {} -> {}".format(dataset.class_names[class_id][:20],
keep[ixs], class_keep))
keep = np.intersect1d(keep, nms_keep).astype(np.int32)
print("\nKept after per-class NMS: {}\n{}".format(keep.shape[0], keep))
#顯示最終的檢測結果
ixs = np.arange(len(keep)) # Display all
# ixs = np.random.randint(0, len(keep), 10) # Display random sample
captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else ""
for c, s in zip(roi_class_ids[keep][ixs], roi_scores[keep][ixs])]
visualize.draw_boxes(
image, boxes=proposals[keep][ixs],
refined_boxes=refined_proposals[keep][ixs],
visibilities=np.where(roi_class_ids[keep][ixs] > 0, 1, 0),
captions=captions, title="Detections after NMS",
ax=get_ax())
7.3 生成masks
這一步從上一層獲取檢測結果,並且執行mask分支來生成每一個instance的分割masks。
7.3.1 Mask Targets
display_images(np.transpose(gt_mask, [2, 0, 1]), cmap="Blues")
7.3.2 預測的masks
#獲取mask分支的預測結果
mrcnn = model.run_graph([image], [
("detections", model.keras_model.get_layer("mrcnn_detection").output),
("masks", model.keras_model.get_layer("mrcnn_mask").output),
])
#獲取檢測結果的class IDs.修剪zero padding.
det_class_ids = mrcnn['detections'][0, :, 4].astype(np.int32)
det_count = np.where(det_class_ids == 0)[0][0]
det_class_ids = det_class_ids[:det_count]
print("{} detections: {}".format(
det_count, np.array(dataset.class_names)[det_class_ids]))
# Masks
det_boxes = utils.denorm_boxes(mrcnn["detections"][0, :, :4], image.shape[:2])
det_mask_specific = np.array([mrcnn["masks"][0, i, :, :, c]
for i, c in enumerate(det_class_ids)])
det_masks = np.array([utils.unmold_mask(m, det_boxes[i], image.shape)
for i, m in enumerate(det_mask_specific)])
log("det_mask_specific", det_mask_specific)
log("det_masks", det_masks)
display_images(det_mask_specific[:4] * 255, cmap="Blues", interpolation="none")
display_images(det_masks[:4] * 255, cmap="Blues", interpolation="none")
視覺化Activations。有助於觀察不同layers。
#獲取一些示例層的activations
activations = model.run_graph([image], [
("input_image", model.keras_model.get_layer("input_image").output),
("res4w_out", model.keras_model.get_layer("res4w_out").output), # for resnet100
("rpn_bbox", model.keras_model.get_layer("rpn_bbox").output),
("roi", model.keras_model.get_layer("ROI").output),
])
#輸入影象 (規範化的)
_ = plt.imshow(modellib.unmold_image(activations["input_image"][0],config))
# Backbone feature map
display_images(np.transpose(activations["res4w_out"][0,:,:,:4], [2, 0, 1]))
#顯示RPN bounding box deltas的直方圖
plt.figure(figsize=(12, 3))
plt.subplot(1, 4, 1)
plt.title("dy")
_ = plt.hist(activations["rpn_bbox"][0,:,0], 50)
plt.subplot(1, 4, 2)
plt.title("dx")
_ = plt.hist(activations["rpn_bbox"][0,:,1], 50)
plt.subplot(1, 4, 3)
plt.title("dw")
_ = plt.hist(activations["rpn_bbox"][0,:,2], 50)
plt.subplot(1, 4, 4)
plt.title("dh")
_ = plt.hist(activations["rpn_bbox"][0,:,3], 50)
# 顯示生成的proposals的y,x座標的分佈
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.title("y1, x1")
plt.scatter(activations["roi"][0,:,0], activations["roi"][0,:,1])
plt.subplot(1, 2, 2)
plt.title("y2, x2")
plt.scatter(activations["roi"][0,:,2], activations["roi"][0,:,3])
plt.show()