初窺Tensorflow Object Detection API 原始碼之(1) FeatureExtractor
阿新 • • 發佈:2019-01-02
models/research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
class FasterRCNNResnetV1FeatureExtractor
init
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0 ):
self._is_training = is_training
self._first_stage_features_stride = first_stage_features_stride
self._train_batch_norm = (batch_norm_trainable and is_training)
self._reuse_weights = reuse_weights
self._weight_decay = weight_decay
preprocess
@abstractmethod
def preprocess (self, resized_inputs):
"""Feature-extractor specific preprocessing (minus image resizing)."""
pass
extract_proposal_features
def extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features.
This function is responsible for extracting feature maps from preprocessed
images. These features are used by the region proposal network (RPN) to
predict proposals.
Args:
preprocessed_inputs: A [batch, height, width, channels] float tensor
representing a batch of images.
scope: A scope name.
Returns:
rpn_feature_map: A tensor with shape [batch, height, width, depth]
"""
with tf.variable_scope(scope, values=[preprocessed_inputs]):
return self._extract_proposal_features(preprocessed_inputs, scope)
_extract_proposal_features
@abstractmethod
def _extract_proposal_features(self, preprocessed_inputs, scope):
"""Extracts first stage RPN features, to be overridden."""
pass
這裡省略幾個函式的介紹
具體可以參考檔案models/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
class FasterRCNNResnetV1FeatureExtractor( faster_rcnn_meta_arch.FasterRCNNFeatureExtractor)
def __init__(self,
architecture,
resnet_model,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
if first_stage_features_stride != 8 and first_stage_features_stride != 16:
raise ValueError('`first_stage_features_stride` must be 8 or 16.')
self._architecture = architecture
self._resnet_model = resnet_model
super(FasterRCNNResnetV1FeatureExtractor, self).__init__(
is_training, first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
preprocess
def preprocess(self, resized_inputs):
channel_means = [123.68, 116.779, 103.939]
return resized_inputs - [[channel_means]]
VGG 預處理,在後期會被呼叫到
_extract_proposal_features
提取RPN的Feature輸入
with tf.control_dependencies([shape_assert]):
# Disables batchnorm for fine-tuning with smaller batch sizes.
# TODO: Figure out if it is needed when image
# batch size is bigger.
with slim.arg_scope(
resnet_utils.resnet_arg_scope(
batch_norm_epsilon=1e-5,
batch_norm_scale=True,
weight_decay=self._weight_decay)):
with tf.variable_scope(
self._architecture, reuse=self._reuse_weights) as var_scope:
_, activations = self._resnet_model(
preprocessed_inputs,
num_classes=None,
is_training=self._train_batch_norm,
global_pool=False,
output_stride=self._first_stage_features_stride,
spatial_squeeze=False,
scope=var_scope)
handle = scope + '/%s/block3' % self._architecture
return activations[handle]
這裡有幾處細節
細節1:tf.Assert
判斷輸入影象的尺寸是否大於33×33
細節2:tf.control_dependencies
tf.Assert先執行
細節3:倒數第二行的“block3”
_extract_proposal_features的輸出取的是Resnet101的Block3的輸出
_extract_box_classifier_features
提取box的Feature輸入
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
with tf.variable_scope(self._architecture, reuse=self._reuse_weights):
with slim.arg_scope(
resnet_utils.resnet_arg_scope(
batch_norm_epsilon=1e-5,
batch_norm_scale=True,
weight_decay=self._weight_decay)):
with slim.arg_scope([slim.batch_norm],
is_training=self._train_batch_norm):
blocks = [
resnet_utils.Block('block4', resnet_v1.bottleneck, [{
'depth': 2048,
'depth_bottleneck': 512,
'stride': 1
}] * 3)
]
proposal_classifier_features = resnet_utils.stack_blocks_dense(
proposal_feature_maps, blocks)
return proposal_classifier_features
slim.arg_scope和resnet_utils.resnet_arg_scope(詳見slim文件和resnet_utils檔案)
查閱了一下,作用是為slim的conv2d,batch_norm,max_pool2d做了一個引數約定
整個函式的輸出為在_extract_proposal_features的輸出基礎上加了block4(3個unit)
class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor)
def __init__(self,
is_training,
first_stage_features_stride,
batch_norm_trainable=False,
reuse_weights=None,
weight_decay=0.0):
super(FasterRCNNResnet101FeatureExtractor, self).__init__(
'resnet_v1_101', resnet_v1.resnet_v1_101, is_training,
first_stage_features_stride, batch_norm_trainable,
reuse_weights, weight_decay)
用指定引數初始化了父類FasterRCNNResnetV1FeatureExtractor
{
architecture:resnet_v1_101,
resnet_model:resnet_v1.resnet_v1_101
}