1. 程式人生 > >Tensorflow使用的預訓練的resnet_v2_50,resnet_v2_101,resnet_v2_152等模型預測,訓練

Tensorflow使用的預訓練的resnet_v2_50,resnet_v2_101,resnet_v2_152等模型預測,訓練

你要的答案或許都在這裡

自己搭建的一個框架,包含模型有:vgg(vgg16,vgg19), resnet(resnet_v2_50,resnet_v2_101,resnet_v2_152), inception_v4, inception_resnet_v2等。

此框架主要針對分類任務, 後面會陸續搭建多工多標籤、檢測、以及rnn等框架,歡迎關注。


下面是以resnet_v2_101為例:

此處將nets中的resnet_utils,合併一起了。

resnet_v2.py

#coding:utf-8
#匯入對應的庫

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import tensorflow as tf
slim = tf.contrib.slim



class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
  """A named tuple describing a ResNet block.
  Its parts are:
    scope: The scope of the `Block`.
    unit_fn: The ResNet unit function which takes as input a `Tensor` and
      returns another `Tensor` with the output of the ResNet unit.
    args: A list of length equal to the number of units in the `Block`. The list
      contains one (depth, depth_bottleneck, stride) tuple for each unit in the
      block to serve as argument to unit_fn.
  """


def subsample(inputs, factor, scope=None):
  """Subsamples the input along the spatial dimensions.
  Args:
    inputs: A `Tensor` of size [batch, height_in, width_in, channels].
    factor: The subsampling factor.
    scope: Optional variable_scope.
  Returns:
    output: A `Tensor` of size [batch, height_out, width_out, channels] with the
      input, either intact (if factor == 1) or subsampled (if factor > 1).
  """
  if factor == 1:
    return inputs
  else:
    return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)


def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
  """Strided 2-D convolution with 'SAME' padding.
  When stride > 1, then we do explicit zero-padding, followed by conv2d with
  'VALID' padding.
  Note that
     net = conv2d_same(inputs, num_outputs, 3, stride=stride)
  is equivalent to
     net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
     net = subsample(net, factor=stride)
  whereas
     net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')
  is different when the input's height or width is even, which is why we add the
  current function. For more details, see ResnetUtilsTest.testConv2DSameEven().
  Args:
    inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
    num_outputs: An integer, the number of output filters.
    kernel_size: An int with the kernel_size of the filters.
    stride: An integer, the output stride.
    rate: An integer, rate for atrous convolution.
    scope: Scope.
  Returns:
    output: A 4-D tensor of size [batch, height_out, width_out, channels] with
      the convolution output.
  """
  if stride == 1:
    return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
                       padding='SAME', scope=scope)
  else:
    kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    inputs = tf.pad(inputs,
                    [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
    return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
                       rate=rate, padding='VALID', scope=scope)


@slim.add_arg_scope
def stack_blocks_dense(net, blocks, output_stride=None,
                       outputs_collections=None):
  """Stacks ResNet `Blocks` and controls output feature density.
  First, this function creates scopes for the ResNet in the form of
  'block_name/unit_1', 'block_name/unit_2', etc.
  Second, this function allows the user to explicitly control the ResNet
  output_stride, which is the ratio of the input to output spatial resolution.
  This is useful for dense prediction tasks such as semantic segmentation or
  object detection.
  Most ResNets consist of 4 ResNet blocks and subsample the activations by a
  factor of 2 when transitioning between consecutive ResNet blocks. This results
  to a nominal ResNet output_stride equal to 8. If we set the output_stride to
  half the nominal network stride (e.g., output_stride=4), then we compute
  responses twice.
  Control of the output feature density is implemented by atrous convolution.
  Args:
    net: A `Tensor` of size [batch, height, width, channels].
    blocks: A list of length equal to the number of ResNet `Blocks`. Each
      element is a ResNet `Block` object describing the units in the `Block`.
    output_stride: If `None`, then the output will be computed at the nominal
      network stride. If output_stride is not `None`, it specifies the requested
      ratio of input to output spatial resolution, which needs to be equal to
      the product of unit strides from the start up to some level of the ResNet.
      For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
      then valid values for the output_stride are 1, 2, 6, 24 or None (which
      is equivalent to output_stride=24).
    outputs_collections: Collection to add the ResNet block outputs.
  Returns:
    net: Output tensor with stride equal to the specified output_stride.
  Raises:
    ValueError: If the target output_stride is not valid.
  """
  # The current_stride variable keeps track of the effective stride of the
  # activations. This allows us to invoke atrous convolution whenever applying
  # the next residual unit would result in the activations having stride larger
  # than the target output_stride.
  current_stride = 1

  # The atrous convolution rate parameter.
  rate = 1

  for block in blocks:
    with tf.variable_scope(block.scope, 'block', [net]) as sc:
      for i, unit in enumerate(block.args):
        if output_stride is not None and current_stride > output_stride:
          raise ValueError('The target output_stride cannot be reached.')

        with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
          # If we have reached the target output_stride, then we need to employ
          # atrous convolution with stride=1 and multiply the atrous rate by the
          # current unit's stride for use in subsequent layers.
          if output_stride is not None and current_stride == output_stride:
            net = block.unit_fn(net, rate=rate, **dict(unit, stride=1))
            rate *= unit.get('stride', 1)

          else:
            net = block.unit_fn(net, rate=1, **unit)
            current_stride *= unit.get('stride', 1)
      net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)

  if output_stride is not None and current_stride != output_stride:
    raise ValueError('The target output_stride cannot be reached.')

  return net


def resnet_arg_scope(weight_decay=0.0001,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True,
                     activation_fn=tf.nn.relu,
                     use_batch_norm=True):
  """Defines the default ResNet arg scope.
  TODO(gpapan): The batch-normalization related default values above are
    appropriate for use in conjunction with the reference ResNet models
    released at https://github.com/KaimingHe/deep-residual-networks. When
    training ResNets from scratch, they might need to be tuned.
  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: The moving average decay when estimating layer activation
      statistics in batch normalization.
    batch_norm_epsilon: Small constant to prevent division by zero when
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.
    activation_fn: The activation function which is used in ResNet.
    use_batch_norm: Whether or not to use batch normalization.
  Returns:
    An `arg_scope` to use for the resnet models.
  """
  batch_norm_params = {
      'decay': batch_norm_decay,
      'epsilon': batch_norm_epsilon,
      'scale': batch_norm_scale,
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
  }

  with slim.arg_scope(
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=slim.variance_scaling_initializer(),
      activation_fn=activation_fn,
      normalizer_fn=slim.batch_norm if use_batch_norm else None,
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      # The following implies padding='SAME' for pool1, which makes feature
      # alignment easier for dense prediction tasks. This is also used in
      # https://github.com/facebook/fb.resnet.torch. However the accompanying
      # code of 'Deep Residual Learning for Image Recognition' uses
      # padding='VALID' for pool1. You can switch to that choice by setting
      # slim.arg_scope([slim.max_pool2d], padding='VALID').
      with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
        return arg_sc




@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
               outputs_collections=None, scope=None):
  """Bottleneck residual unit variant with BN before convolutions.
  This is the full preactivation residual unit variant proposed in [2]. See
  Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
  variant which has an extra bottleneck layer.
  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.
  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.
  Returns:
    The ResNet unit's output.
  """
  with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
    if depth == depth_in:
      shortcut = subsample(inputs, stride, 'shortcut')
    else:
      shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
                             normalizer_fn=None, activation_fn=None,
                             scope='shortcut')

    residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
                           scope='conv1')
    residual = conv2d_same(residual, depth_bottleneck, 3, stride,
                                        rate=rate, scope='conv2')
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           normalizer_fn=None, activation_fn=None,
                           scope='conv3')

    output = shortcut + residual

    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.original_name_scope,
                                            output)


def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
  """Generator for v2 (preactivation) ResNet models.
  This function generates a family of ResNet v2 models. See the resnet_v2_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.
  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.
  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether is training or not.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it. If excluded, `inputs` should be the
      results of an activation-less convolution.
    spatial_squeeze: if True, logits is of shape [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
        To use this parameter, the input images must be smaller than 300x300
        pixels, in which case the output logit layer does not contain spatial
        information and can be removed.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.
  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.name + '_end_points'
    with slim.arg_scope([slim.conv2d, bottleneck,
                         stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope([slim.batch_norm], is_training=is_training):
        net = inputs
        if include_root_block:
          if output_stride is not None:
            if output_stride % 4 != 0:
              raise ValueError('The output_stride needs to be a multiple of 4.')
            output_stride /= 4
          # We do not include batch normalization or activation functions in
          # conv1 because the first ResNet unit will perform these. Cf.
          # Appendix of [2].
          with slim.arg_scope([slim.conv2d],
                              activation_fn=None, normalizer_fn=None):
            net = conv2d_same(net, 64, 7, stride=2, scope='conv1')
          net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
        net = stack_blocks_dense(net, blocks, output_stride)
        # This is needed because the pre-activation variant does not have batch
        # normalization or activation functions in the residual unit output. See
        # Appendix of [2].
        net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
        if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
          if spatial_squeeze:
            net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(
            end_points_collection)
        if num_classes is not None:
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
resnet_v2.default_image_size = 224


def resnet_v2_block(scope, base_depth, num_units, stride):
  """Helper function for creating a resnet_v2 bottleneck block.
  Args:
    scope: The scope of the block.
    base_depth: The depth of the bottleneck layer for each unit.
    num_units: The number of units in the block.
    stride: The stride of the block, implemented as a stride in the last unit.
      All other units have stride=1.
  Returns:
    A resnet_v2 bottleneck block.
  """
  return Block(scope, bottleneck, [{
      'depth': base_depth * 4,
      'depth_bottleneck': base_depth,
      'stride': 1
  }] * (num_units - 1) + [{
      'depth': base_depth * 4,
      'depth_bottleneck': base_depth,
      'stride': stride
  }])
resnet_v2.default_image_size = 224


def resnet_v2_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='resnet_v2_50'):
  """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
      resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
      resnet_v2_block('block3', base_depth=256, num_units=6, stride=2),
      resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
  ]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
                   reuse=reuse, scope=scope)
resnet_v2_50.default_image_size = resnet_v2.default_image_size


def resnet_v2_101(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v2_101'):
  """ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
      resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
      resnet_v2_block('block3', base_depth=256, num_units=23, stride=2),
      resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
  ]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
                   reuse=reuse, scope=scope)
resnet_v2_101.default_image_size = resnet_v2.default_image_size


def resnet_v2_152(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v2_152'):
  """ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
      resnet_v2_block('block2', base_depth=128, num_units=8, stride=2),
      resnet_v2_block('block3', base_depth=256, num_units=36, stride=2),
      resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
  ]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
                   reuse=reuse, scope=scope)
resnet_v2_152.default_image_size = resnet_v2.default_image_size


def resnet_v2_200(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v2_200'):
  """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
      resnet_v2_block('block2', base_depth=128, num_units=24, stride=2),
      resnet_v2_block('block3', base_depth=256, num_units=36, stride=2),
      resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
  ]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, spatial_squeeze=spatial_squeeze,
                   reuse=reuse, scope=scope)
resnet_v2_200.default_image_size = resnet_v2.default_image_size


from datetime import datetime
import math
import time
def time_tensorflow_run(session, target, info_string):
    num_steps_burn_in = 10
    total_duration = 0.0
    total_duration_squared = 0.0
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time()
        _ = session.run(target)
        duration = time.time() - start_time
        if i >= num_steps_burn_in:
            if not i % 10:
                print ('%s: step %d, duration = %.3f' %
                       (datetime.now(), i - num_steps_burn_in, duration))
            total_duration += duration
            total_duration_squared += duration * duration
    mn = total_duration / num_batches
    vr = total_duration_squared / num_batches - mn * mn
    sd = math.sqrt(vr)
    print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
           (datetime.now(), info_string, num_batches, mn, sd))
def main():
    batch_size = 32
    height, width = 224, 224
    inputs = tf.random_uniform((batch_size, height, width, 3))
    net, end_points = resnet_v2_152(inputs, 1000, is_training=False)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)  
    num_batches=1
    time_tensorflow_run(sess, net, "Forward") 

main.py
# -*- coding: utf-8 -*-
"""
Created on 2017 10.17
@author: liupeng
"""

import numpy as np  
import tensorflow as tf
slim = tf.contrib.slim
import numpy as np
import argparse
import os
from PIL import Image
from datetime import datetime
import math
import time
from resnet_v2 import *

batch_size = 32
height, width = 224, 224
X = tf.placeholder(tf.float32, [None, height, width, 3])  
#Y = tf.placeholder(tf.float32, [None, 1000])  
#keep_prob = tf.placeholder(tf.float32) # dropout
#keep_prob_fc = tf.placeholder(tf.float32) # dropout

print ("-----------------------------main.py start--------------------------")

# шонч╗Г  
def main():

    # model
    arg_scope = resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v2_101(X, 1001, is_training=False)

    # initializer
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init) 
    
    #reload model
    saver = tf.train.Saver(tf.global_variables())
    checkpoint_path = 'model/101/resnet_v2_101.ckpt'
    saver.restore(sess, checkpoint_path)

    # input
    # input = X
    # inputs = tf.random_uniform((batch_size, height, width, 3))
    im = tf.read_file("m.jpg")
    im = tf.image.decode_jpeg(im)
    im = tf.image.resize_images(im, (width, height))
    im = tf.reshape(im, [-1,height,width,3])
    im = tf.cast(im, tf.float32)
    inputs = im
    
    # run
    images = sess.run(inputs)
    print (images)
    start_time = time.time()
    out_put = sess.run(net, feed_dict={X:images})
    duration = time.time() - start_time
    predict = tf.reshape(out_put, [-1, 1001])
    max_idx_p = tf.argmax(predict, 1)
    print (out_put.shape)
    print (sess.run(max_idx_p))
    print ('run time:', duration)
    sess.close()

main()

當構建模型,定義自己的輸出時,要將resnet_v2_101(X, 1001, is_training=False) 改為 resnet_v2_101(X, is_training=True),然後自己寫輸出層。

參考此處程式碼:

 if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')


參考樣例:(注意saver1和saver2,每個saver只儲存自己領域內的權重,不能只使用saver1,會導致只加載或者儲存resnet_v2_101的權重,不會儲存自己加的模組的權重)

**還要一點要注意,我們輸入到model前要對影象進行預處理,上面給的連結中有preprocessing檔案,裡邊有各個模型資料預處理的方式,具體使用方法可以參考:連結中的train_image_classifier.pyeval_image_classifier.py

# -*- coding: utf-8 -*-
"""
Created on 2017 10.17
@author: liupeng
"""

import numpy as np  
import tensorflow as tf
slim = tf.contrib.slim
import numpy as np
import argparse
import os
from PIL import Image
from datetime import datetime
import math
import time
from resnet import *

batch_size = 32
height, width = 224, 224
X = tf.placeholder(tf.float32, [None, height, width, 3])  
#Y = tf.placeholder(tf.float32, [None, 1000])  
#keep_prob = tf.placeholder(tf.float32) # dropout
#keep_prob_fc = tf.placeholder(tf.float32) # dropout

print ("-----------------------------main.py start--------------------------")

# шонч╗Г  
def main():

    # model
    arg_scope = resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v2_101(X, is_training=True)

    # initializer
    #init = tf.global_variables_initializer()
    sess = tf.Session()
    #sess.run(init) 
    
    #reload model
    saver1 = tf.train.Saver(tf.global_variables())
    checkpoint_path = 'model/101/resnet_v2_101.ckpt'
    saver1.restore(sess, checkpoint_path)
    
    num_classes = 10
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits2')
    net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
    # initializer
    init = tf.global_variables_initializer()
    sess.run(init) 
    
    saver2 = tf.train.Saver(tf.global_variables())
    #saver2.restore(sess, "model/101/fine-tune-1000")
    
    # input
    # input = X
    # inputs = tf.random_uniform((batch_size, height, width, 3))
    im = tf.read_file("m.jpg")
    im = tf.image.decode_jpeg(im)
    im = tf.image.resize_images(im, (width, height))
    im = tf.reshape(im, [-1,height,width,3])
    im = tf.cast(im, tf.float32)
    inputs = im
    
    # run
    images = sess.run(inputs)
    print (images)
    start_time = time.time()
    out_put = sess.run(net, feed_dict={X:images})
    duration = time.time() - start_time
    saver2.save(sess, "model/101/fine-tune", global_step=1000, write_meta_graph=False)
    
    predict = tf.reshape(out_put, [-1, num_classes])
    max_idx_p = tf.argmax(predict, 1)
    print (out_put.shape)
    print (sess.run(max_idx_p))
    print ('run time:', duration)
    sess.close()

def test():

    # model
    arg_scope = resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v2_101(X, is_training=False)

    # initializer
    #init = tf.global_variables_initializer()
    sess = tf.Session()
    #sess.run(init) 
    
    #reload model
    saver1 = tf.train.Saver(tf.global_variables())
    checkpoint_path = 'model/101/resnet_v2_101.ckpt'
    saver1.restore(sess, checkpoint_path)
    
    num_classes = 10
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits2')
    net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
    # initializer
    init = tf.global_variables_initializer()
    sess.run(init) 
    
    saver2 = tf.train.Saver(tf.global_variables())
    saver2.restore(sess, "model/101/fine-tune-1000")
    
    # input
    # input = X
    # inputs = tf.random_uniform((batch_size, height, width, 3))
    im = tf.read_file("m.jpg")
    im = tf.image.decode_jpeg(im)
    im = tf.image.resize_images(im, (width, height))
    im = tf.reshape(im, [-1,height,width,3])
    im = tf.cast(im, tf.float32)
    inputs = im
    
    # run
    images = sess.run(inputs)
    print (images)
    start_time = time.time()
    out_put = sess.run(net, feed_dict={X:images})
    
    duration = time.time() - start_time
    predict = tf.reshape(out_put, [-1, num_classes])
    max_idx_p = tf.argmax(predict, 1)
    print (out_put.shape)
    print (sess.run(max_idx_p))
    print ('run time:', duration)
    sess.close()




# main()
test()

訓練的時候需要注意兩點,(1)輸入引數training=True,(2)計算loss時,要新增以下程式碼(即新增update_ops到最後的train_op中)。這樣才能計算μ和σ的滑動平均(測試時會用到)

  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(loss)

比較完善的finetune程式碼:

下面以 Inception_V4為例:

# -*- coding: utf-8 -*-
"""
Created on 2017 10.17
@author: liupeng
"""

import numpy as np
import tensorflow as tf
slim = tf.contrib.slim
import numpy as np
import argparse
import os
from PIL import Image
from datetime import datetime
import math
import time
from load_image import *
import cv2
from load_cifar10 import load_cifar10_data
from inception_v4 import *
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


craterDir = "train"
#train_data, train_label = load_database(craterDir)
train_data, train_label, valid_data, valid_label = load_cifar10_data(299, 299)
print(len(train_data))
image_number = len(train_data)
print (train_data)
print (train_label)
# batch_x, batch_y = get_next_batch(data, label, 0)
#craterDir = "validation"
#valid_data, valid_label = load_database(craterDir)

batch_size = 32
height, width = 299, 299
X = tf.placeholder(tf.float32, [None, height, width, 3])
#Y = tf.placeholder(tf.float32, [None, 4])
Y = tf.placeholder(tf.float32, [None, 10])
is_train = tf.placeholder(tf.bool, name='is_train')
keep_prob = tf.placeholder(tf.float32) # dropout
keep_prob_fc = tf.placeholder(tf.float32) # dropout

print ("-----------------------------main.py start--------------------------")

# шонч╗Г
def main():

    num_classes = 10
    arg_scope = inception_v4_arg_scope()
    with slim.arg_scope(arg_scope):
        net, end_points = inception_v4(X, is_training=is_train)
    #sess1 = tf.Session()
    #saver1 = tf.train.Saver(tf.global_variables())
    #checkpoint_path = 'model/inception_v4.ckpt'
    #saver1.restore(sess1, checkpoint_path)
    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'):
        with tf.variable_scope('Logits_out'):
            # 8 x 8 x 1536
            net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
                                      scope='AvgPool_1a_out')
            # 1 x 1 x 1536
            dropout_keep_prob = 0.8
            net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b_out')
            net = slim.flatten(net, scope='PreLogitsFlatten_out')
            # 1536
            net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu, scope='Logits_out0')
            net = slim.fully_connected(net, num_classes, activation_fn=None,scope='Logits_out1')

    #checkpoint_exclude_scopes = "InceptionV4/Logits,InceptionV4/AuxLogits" 此處不應該為這個了,全連線層變了,這些引數也不在用了。
    checkpoint_exclude_scopes = "Logits_out"
    exclusions = []
    if checkpoint_exclude_scopes:
        exclusions = [scope.strip() for scope in checkpoint_exclude_scopes.split(',')]
    print (exclusions)
    # 需要載入的引數。
    variables_to_restore = []
    # 需要訓練的引數
    variables_to_train = []
    for var in slim.get_model_variables():
        excluded = False
        for exclusion in exclusions:
            if var.op.name.startswith(exclusion):
                excluded = True
                variables_to_train.append(var)
                print ("ok")
                print (var.op.name)
                break
        if not excluded:
            variables_to_restore.append(var)

    # loss function
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = net))
    # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = net))
    # 確定要訓練的引數。
    # train_layers = ['Logit_out']
    # var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]
    var_list = variables_to_train
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(loss)
    predict = tf.reshape(net, [-1, num_classes])
    max_idx_p = tf.argmax(predict, 1)
    max_idx_l = tf.argmax(Y, 1)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    #------------------------------------------------------------------------------------#
    # initializer
    #init = tf.global_variables_initializer()
    sess = tf.Session()
    #sess.run(init)
    # initializer
    init = tf.global_variables_initializer()
    sess.run(init)

    saver2 = tf.train.Saver(tf.global_variables())
    # saver2.restore(sess, "model/50/fine-tune-1120")
    model_path = 'model/50/fine-tune'

    net_vars = variables_to_restore
    saver_net = tf.train.Saver(net_vars)
    checkpoint_path = 'model/inception_v4.ckpt'
    saver_net.restore(sess, checkpoint_path)

    # saver2.restore(sess, "model/50/fine-tune-1120")
    '''
    im = tf.read_file("m.jpg")
    im = tf.image.decode_jpeg(im)
    im = tf.image.resize_images(im, (width, height))
    im = tf.reshape(im, [-1,height,width,3])
    im = tf.cast(im, tf.float32)
    train_images = im
    label = [0,0,0,1]
    label = tf.reshape(label, [-1, num_classes])
    train_labels = label'''
    for j in range(2000):
        for i in range(int(image_number/batch_size)):

            # imgs, labels = get_next_batch(i)
            # keep_prob: 0.75
            # images, labels = sess.run([train_images, train_labels])
            images, labels = get_next_batch(train_data, train_label, i, batch_size=batch_size)
            #images = tf.reshape(images, [-1,224,224,3])
            #只能單張單張的處理。
            #images = preprocess_image(images, 224, 224, is_training=True)
            #images = sess.run(images)
            #labels = tf.one_hot(labels,4,1,0)
            #labels = tf.reshape(labels, [-1, 4])
            #labels = sess.run(labels)
            los, _ = sess.run([loss,optimizer], feed_dict={X: images, Y: labels, is_train:True})
            print los
            # out = sess.run(net, feed_dict={X: images, Y: labels, is_train:True})
            # print (out)
            if i%20==0:
                loss_, acc_ = sess.run([loss, accuracy], feed_dict={X: images, Y: labels, is_train:False})
                print (i, loss_, acc_)

            if i%80==0 and i!=0:
                saver2.save(sess, model_path, global_step=i, write_meta_graph=False)

            if i%80==0:
                # img, label = get_next_batch( int((image_number*0.9+i%(image_number*0.1))/batch_size) )
                # images, labels = sess.run([test_images, test_labels])
                images, labels = get_next_batch(valid_data, valid_label, i%150, batch_size=64)
                #images = tf.reshape(images, [-1,224,224,3])
                #images = preprocess_image(images, 224, 224, is_training=True)
                #images = sess.run(images)
                #labels = tf.one_hot(labels,4,1,0)
                #labels = tf.reshape(labels, [-1, 4])
                #labels = sess.run(labels)
                ls, acc = sess.run([loss, accuracy], feed_dict={X: images, Y: labels, is_train:False})
                print(i, ls, acc)
                #if acc > 0.95:
                #    break
    sess.close()

'''
    # input
    # input = X
    # inputs = tf.random_uniform((batch_size, height, width, 3))
    im = tf.read_file("m.jpg")
    im = tf.image.decode_jpeg(im)
    im = tf.image.resize_images(im, (width, height))
    im = tf.reshape(im, [-1,height,width,3])
    im = tf.cast(im, tf.float32)
    inputs = im

    # run
    images = sess.run(inputs)
    print (images)
    start_time = time.time()
    out_put = sess.run(net, feed_dict={X:images})
    duration = time.time() - start_time
    saver2.save(sess, "model/50/fine-tune", global_step=1000, write_meta_graph=False)
    print (out_put.shape)
    print (sess.run(max_idx_p, feed_dict={X:images}))
    print ('run time:', duration)
    sess.close()'''

def test():

    # model
    arg_scope = resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v2_50(X, is_training=False)

    # initializer
    #init = tf.global_variables_initializer()
    sess = tf.Session()
    #sess.run(init)

    #reload model
    saver1 = tf.train.Saver(tf.global_variables())
    checkpoint_path = 'model/50/resnet_v2_50.ckpt'
    saver1.restore(sess, checkpoint_path)

    num_classes = 4
    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits2')
    net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
    # initializer
    init = tf.global_variables_initializer()
    sess.run(init)

    saver2 = tf.train.Saver(tf.global_variables())
    saver2.restore(sess, "model/50/fine-tune-80")

    # input
    # input = X
    # inputs = tf.random_uniform((batch_size, height, width, 3))
    im = cv2.imread("lp.jpg")
    im = cv2.resize(im, (width, height))
    im = np.reshape(im, [-1,height,width,3])
    images = im

    # run
    images = np.asarray(images)
    '''
    m = images.mean()
    s = images.std()
    min_s = 1.0/(np.sqrt(images.shape[0]*images.shape[1]))
    std = max(min_s, s)
    images = (images-m)/std'''
    images = (images-127.5)
    print (images)
    start_time = time.time()
    out_put = sess.run(net, feed_dict={X:images})

    duration = time.time() - start_time
    predict = tf.reshape(out_put, [-1, num_classes])
    max_idx_p = tf.argmax(predict, 1)
    print (out_put.shape)
    print (sess.run(max_idx_p))
    print ('run time:', duration)
    sess.close()



main()
# test()