keras 深度學習之遷移學習和fine tune
阿新 • • 發佈:2019-01-31
一.遷移學習
就是說把別人訓練好的網路(比如說卷積特徵提取部分)拿過來配合上自己分類器,這樣可以增加訓練的速度和提高分類效果。
'''
Created on 2018年8月28日
'''
#遷移學習
import keras
from keras.models import Model
from keras.applications import ResNet50
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
train_data_dir = "data/train"
valid_data_dir = "data/valid"
img_width, img_height = 224,224
batch_size = 32
train_samples_nums = 14000
valid_samples_nums = 1400
#遷移學習使用的是ResNet,只需要ResNet的特徵提取部分
model = ResNet50(weights="model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", include_top=False)
datagen = ImageDataGenerator(rescale=1. / 255)
#分別定義train,valid的資料生成器
train_generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
valid_generator = datagen.flow_from_directory(
valid_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None ,
shuffle=False)
#使用ResNet提取特徵,生成新的特徵,儲存成np格式
bottleneck_train_features = model.predict_generator(train_generator, train_samples_nums // batch_size)
np.save(open('train_features.npy', 'wb'),bottleneck_train_features)
bottleneck_valid_features = model.predict_generator(valid_generator, valid_samples_nums // batch_size)
np.save(open('valid_features.npy', 'wb'), bottleneck_valid_features)
#重新讀取資料,定義標籤
train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
train_labels = np.array([0] * (train_samples_nums // 2) + [1] * (train_samples_nums // 2))
valid_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
valid_labels = np.array([0] * (valid_samples_nums // 2) + [1] * (valid_samples_nums // 2))
#以上就是通過遷移學習提取特徵,下面可以使用分類器,對這些特徵進行分類.
# 分類器可以是SVM,MLP,CNN,RNN,RF,XGBOOST,lightGBM
# 比如:使用SVM
from sklearn.svm import SVC
clf = SVC()
clf.set_params(kernel="linear",probability=True).fit(train_data, train_labels)
二.fine tune(微調)
一般在遷移學習的時候時候。也就是在將別人的訓練好的網路拿過來,然後加上自己分類層(一般使用的是加上自己的卷積層+全連線層,或者直接全連線層),其中別人的網路可以設定為全部可訓練,也可以設定部分可訓練,最後學習率可以調低一點,一般取0.0001-0.001。
'''
Created on 2018年8月28日
'''
#fine tune
#首先需要自己寫個ResNet,這些程式碼都可以在from keras.applications import ResNet50中拷貝
from keras.layers import Input, Add, Dense, Activation, Flatten, Conv2D, BatchNormalization,MaxPooling2D,Concatenate,Lambda,AveragePooling2D
from keras import backend as K
from keras import layers
from keras.models import Model
from keras.optimizers import SGD
#定義resnet的殘差模組
def identity_block(input_tensor, kernel_size, filters, stage, block):
filters1, filters2, filters3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(filters2, kernel_size,
padding='same', name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = layers.add([x, input_tensor])
x = Activation('relu')(x)
return x
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
filters1, filters2, filters3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(filters1, (1, 1), strides=strides,
name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(filters2, kernel_size, padding='same',
name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = Conv2D(filters3, (1, 1), strides=strides,
name=conv_name_base + '1')(input_tensor)
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = layers.add([x, shortcut])
x = Activation('relu')(x)
return x
def MyResNet():
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
input_shape_img = (224, 224, 3)
img_input = Input(shape=input_shape_img)
x = Conv2D(
64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input)
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
#加入自己的全連線層分類器,就行了
x = Flatten()(x)
x = Dense(2, activation='softmax', name='fc2')(x)
model = Model(img_input, x, name='mymodel')
return model
model = MyResNet()
model.summary()
#載入權重
resnet_weight = "model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5"
model.load_weights(resnet_weight, by_name=True)
#也可以指定哪一層可以進行訓練
for layer in model.layers[:10]:
layer.trainable = False
#編譯模型,然後一般fine tune都使用比較低的學習率
sgd = SGD(lr=0.0001, decay=0.00002, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])