ShuffleNet是由曠世發表的一個計算效率極高的CNN架構,它是專門為計算能力非常有限的移動裝置(例如,10-150 MFLOPs)而設計的。該結構利用組卷積和通道混洗兩種新的運算方法,在保證計算精度的同時,大大降低了計算成本。ImageNet分類和MS COCO物件檢測實驗表明,在40 MFLOPs的計算預算下,ShuffleNet的效能優於其他結構,例如,在ImageNet分類任務上,ShuffleNet的top-1 error 7.8%比最近的MobileNet低。在基於arm的移動裝置上,ShuffleNet比AlexNet實際加速了13倍,同時保持了相當的準確性。

Paper:ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile





def _group_conv(x,filters,kernel,stride,groups):
 Group convolution
 # Arguments
  x: Tensor,input tensor of with `channels_last` or 'channels_first' data format
  filters: Integer,number of output channels
  kernel: An integer or tuple/list of 2 integers,specifying the
   width and height of the 2D convolution window.
  strides: An integer or tuple/list of 2 integers,specifying the strides of the convolution along the width and height.
   Can be a single integer to specify the same value for
   all spatial dimensions.
  groups: Integer,number of groups per channel
 # Returns
  Output tensor
 channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
 in_channels = K.int_shape(x)[channel_axis]
 # number of input channels per group
 nb_ig = in_channels // groups
 # number of output channels per group
 nb_og = filters // groups
 gc_list = []
 # Determine whether the number of filters is divisible by the number of groups
 assert filters % groups == 0
 for i in range(groups):
  if channel_axis == -1:
   x_group = Lambda(lambda z: z[:,:,i * nb_ig: (i + 1) * nb_ig])(x)
   x_group = Lambda(lambda z: z[:,i * nb_ig: (i + 1) * nb_ig,:])(x)
 return Concatenate(axis=channel_axis)(gc_list)



>>> d = np.array([0,1,2,3,4,5,6,7,8]) 
>>> x = np.reshape(d,(3,3)) 
>>> x = np.transpose(x,[1,0]) # 轉置
>>> x = np.reshape(x,(9,)) # 平鋪
'[0 1 2 3 4 5 6 7 8] --> [0 3 6 1 4 7 2 5 8]' 


def _channel_shuffle(x,groups):
 Channel shuffle layer
 # Arguments
  x: Tensor,input tensor of with `channels_last` or 'channels_first' data format
  groups: Integer,number of groups per channel
 # Returns
  Shuffled tensor
 if K.image_data_format() == 'channels_last':
  height,width,in_channels = K.int_shape(x)[1:]
  channels_per_group = in_channels // groups
  pre_shape = [-1,height,groups,channels_per_group]
  dim = (0,3)
  later_shape = [-1,in_channels]
  in_channels,width = K.int_shape(x)[1:]
  channels_per_group = in_channels // groups
  pre_shape = [-1,channels_per_group,width]
  dim = (0,4)
  later_shape = [-1,in_channels,width]
 x = Lambda(lambda z: K.reshape(z,pre_shape))(x)
 x = Lambda(lambda z: K.permute_dimensions(z,dim))(x) 
 x = Lambda(lambda z: K.reshape(z,later_shape))(x)
 return x

ShuffleNet Unit


Python 3.6

Tensorlow 1.13.1

Keras 2.2.4


支援channel first或channel last

# -*- coding: utf-8 -*-
Created on Thu Apr 25 18:26:41 2019
@author: zjn
import numpy as np
from keras.callbacks import LearningRateScheduler
from keras.models import Model
from keras.layers import Input,Conv2D,Dropout,Dense,GlobalAveragePooling2D,Concatenate,AveragePooling2D
from keras.layers import Activation,BatchNormalization,add,Reshape,ReLU,DepthwiseConv2D,MaxPooling2D,Lambda
from keras.utils.vis_utils import plot_model
from keras import backend as K
from keras.optimizers import SGD
def _group_conv(x,groups):
 Group convolution
 # Arguments
  x: Tensor,number of groups per channel
 # Returns
  Output tensor
 channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
 in_channels = K.int_shape(x)[channel_axis]
 # number of input channels per group
 nb_ig = in_channels // groups
 # number of output channels per group
 nb_og = filters // groups
 gc_list = []
 # Determine whether the number of filters is divisible by the number of groups
 assert filters % groups == 0
 for i in range(groups):
  if channel_axis == -1:
   x_group = Lambda(lambda z: z[:,use_bias=False)(x_group))
 return Concatenate(axis=channel_axis)(gc_list)
def _channel_shuffle(x,number of groups per channel
 # Returns
  Shuffled tensor
 if K.image_data_format() == 'channels_last':
 return x
def _shufflenet_unit(inputs,stage,bottleneck_ratio=0.25):
 ShuffleNet unit
 # Arguments
  inputs: Tensor,number of groups per channel
  stage: Integer,stage number of ShuffleNet
  bottleneck_channels: Float,bottleneck ratio implies the ratio of bottleneck channels to output channels
 # Returns
  Output tensor
 # Note
  For Stage 2,we(authors of shufflenet) do not apply group convolution on the first pointwise layer 
  because the number of input channels is relatively small.
 channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
 in_channels = K.int_shape(inputs)[channel_axis]
 bottleneck_channels = int(filters * bottleneck_ratio)
 if stage == 2:
  x = Conv2D(filters=bottleneck_channels,strides=1,use_bias=False)(inputs)
  x = _group_conv(inputs,bottleneck_channels,(1,1),groups)
 x = BatchNormalization(axis=channel_axis)(x)
 x = ReLU()(x)
 x = _channel_shuffle(x,groups)
 x = DepthwiseConv2D(kernel_size=kernel,depth_multiplier=1,use_bias=False)(x)
 x = BatchNormalization(axis=channel_axis)(x)
 if stride == 2:
  x = _group_conv(x,filters - in_channels,groups)
  x = BatchNormalization(axis=channel_axis)(x)
  avg = AveragePooling2D(pool_size=(3,3),strides=2,padding='same')(inputs)
  x = Concatenate(axis=channel_axis)([x,avg])
  x = _group_conv(x,groups)
  x = BatchNormalization(axis=channel_axis)(x)
  x = add([x,inputs])
 return x
def _stage(x,repeat,stage):
 Stage of ShuffleNet
 # Arguments
  x: Tensor,number of groups per channel
  repeat: Integer,total number of repetitions for a shuffle unit in every stage
  stage: Integer,stage number of ShuffleNet
 # Returns
  Output tensor
 x = _shufflenet_unit(x,stage)
 for i in range(1,repeat):
  x = _shufflenet_unit(x,stage)
 return x
def ShuffleNet(input_shape,classes):
 ShuffleNet architectures
 # Arguments
  input_shape: An integer or tuple/list of 3 integers,shape
   of input tensor
  k: Integer,number of classes to predict
 # Returns
  A keras model
 inputs = Input(shape=input_shape)
 x = Conv2D(24,use_bias=True,activation='relu')(inputs)
 x = MaxPooling2D(pool_size=(3,padding='same')(x)
 x = _stage(x,filters=384,kernel=(3,groups=8,repeat=4,stage=2)
 x = _stage(x,filters=768,repeat=8,stage=3)
 x = _stage(x,filters=1536,stage=4)
 x = GlobalAveragePooling2D()(x)
 x = Dense(classes)(x)
 predicts = Activation('softmax')(x)
 model = Model(inputs,predicts)
 return model
if __name__ == '__main__':
 model = ShuffleNet((224,224,1000)

