kaggle之人臉特徵識別
阿新 • • 發佈:2019-01-03
facial-keypoints-detection, 這是一個人臉識別任務,任務是識別人臉圖片中的眼睛、鼻子、嘴的位置。訓練集包含以下15個位置的座標,行末是圖片的畫素值,共96*96個畫素值。測試集只包含圖片的畫素值。
left_eye_center, right_eye_center, left_eye_inner_corner, left_eye_outer_corner, right_eye_inner_corner, right_eye_outer_corner, left_eyebrow_inner_end, left_eyebrow_outer_end, right_eyebrow_inner_end, right_eyebrow_outer_end, nose_tip, mouth_left_corner, mouth_right_corner, mouth_center_top_lip, mouth_center_bottom_lip
import cPickle as pickle
from datetime import datetime
import os
import sys
import numpy as np
import pandas as pd
from lasagne import layers
from nolearn.lasagne import BatchIterator
from nolearn.lasagne import NeuralNet
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
import theano
/Library/Python/2.7/site-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
"downsample module has been moved to the theano.tensor.signal.pool module.")
資料載入與預覽
train_file = 'training.csv'
test_file = 'test.csv'
def load(test=False, cols=None):
"""
載入資料,通過引數控制載入訓練集還是測試集,並篩選特徵列
"""
fname = test_file if test else train_file
df = pd.read_csv(os.path.expanduser(fname))
# 將影象資料轉換為陣列
df['Image'] = df['Image'].apply(lambda x: np.fromstring(x, sep=' '))
# 篩選指定的資料列
if cols:
df = df[list(cols) + ['Image']]
print(df.count()) # 每列的簡單統計
df = df.dropna() # 刪除空資料
# 歸一化到0到1
X = np.vstack(df['Image'].values) / 255.
X = X.astype(np.float32)
# 針對訓練集目標標籤進行歸一化
if not test:
y = df[df.columns[:-1]].values
y = (y - 48) / 48
X, y = shuffle(X, y, random_state=42)
y = y.astype(np.float32)
else:
y = None
return X, y
# 將單行畫素資料轉換為三維矩陣
def load2d(test=False, cols=None):
X, y = load(test=test, cols=cols)
X = X.reshape(-1, 1, 96, 96)
return X, y
資料處理
一種方式是我們訓練一個分類器,用來分類所有的目標特徵。另一種是針對眼鏡、鼻子、嘴分別設定不同的分類器,每個分類器只預測單個目標。通過觀察資料我們發現,訓練集中有許多缺失資料,如果訓練一個分類器,刪掉缺失資料會讓我們的樣本集變小,不能很好地利用起資料,因此,我們選擇第二種方式,每個目標訓練一個分類器,這樣更好的利用樣本資料。
from collections import OrderedDict
from sklearn.base import clone
SPECIALIST_SETTINGS = [
dict(
columns=(
'left_eye_center_x', 'left_eye_center_y',
'right_eye_center_x', 'right_eye_center_y',
),
flip_indices=((0, 2), (1, 3)),
),
dict(
columns=(
'nose_tip_x', 'nose_tip_y',
),
flip_indices=(),
),
dict(
columns=(
'mouth_left_corner_x', 'mouth_left_corner_y',
'mouth_right_corner_x', 'mouth_right_corner_y',
'mouth_center_top_lip_x', 'mouth_center_top_lip_y',
),
flip_indices=((0, 2), (1, 3)),
),
dict(
columns=(
'mouth_center_bottom_lip_x',
'mouth_center_bottom_lip_y',
),
flip_indices=(),
),
dict(
columns=(
'left_eye_inner_corner_x', 'left_eye_inner_corner_y',
'right_eye_inner_corner_x', 'right_eye_inner_corner_y',
'left_eye_outer_corner_x', 'left_eye_outer_corner_y',
'right_eye_outer_corner_x', 'right_eye_outer_corner_y',
),
flip_indices=((0, 2), (1, 3), (4, 6), (5, 7)),
),
dict(
columns=(
'left_eyebrow_inner_end_x', 'left_eyebrow_inner_end_y',
'right_eyebrow_inner_end_x', 'right_eyebrow_inner_end_y',
'left_eyebrow_outer_end_x', 'left_eyebrow_outer_end_y',
'right_eyebrow_outer_end_x', 'right_eyebrow_outer_end_y',
),
flip_indices=((0, 2), (1, 3), (4, 6), (5, 7)),
),
]
class FlipBatchIterator(BatchIterator):
flip_indices = [
(0, 2), (1, 3),
(4, 8), (5, 9), (6, 10), (7, 11),
(12, 16), (13, 17), (14, 18), (15, 19),
(22, 24), (23, 25),
]
def transform(self, Xb, yb):
Xb, yb = super(FlipBatchIterator, self).transform(Xb, yb)
# Flip half of the images in this batch at random:
bs = Xb.shape[0]
indices = np.random.choice(bs, bs / 2, replace=False)
Xb[indices] = Xb[indices, :, :, ::-1]
if yb is not None:
# Horizontal flip of all x coordinates:
yb[indices, ::2] = yb[indices, ::2] * -1
# Swap places, e.g. left_eye_center_x -> right_eye_center_x
for a, b in self.flip_indices:
yb[indices, a], yb[indices, b] = (
yb[indices, b], yb[indices, a])
return Xb, yb
class EarlyStopping(object):
def __init__(self, patience=100):
self.patience = patience
self.best_valid = np.inf
self.best_valid_epoch = 0
self.best_weights = None
def __call__(self, nn, train_history):
current_valid = train_history[-1]['valid_loss']
current_epoch = train_history[-1]['epoch']
if current_valid < self.best_valid:
self.best_valid = current_valid
self.best_valid_epoch = current_epoch
self.best_weights = nn.get_all_params_values()
elif self.best_valid_epoch + self.patience < current_epoch:
print("Early stopping.")
print("Best valid loss was {:.6f} at epoch {}.".format(
self.best_valid, self.best_valid_epoch))
nn.load_params_from(self.best_weights)
raise StopIteration()
class AdjustVariable(object):
def __init__(self, name, start=0.03, stop=0.001):
self.name = name
self.start, self.stop = start, stop
self.ls = None
def __call__(self, nn, train_history):
if self.ls is None:
self.ls = np.linspace(self.start, self.stop, nn.max_epochs)
epoch = train_history[-1]['epoch']
new_value = np.cast['float32'](self.ls[epoch - 1])
getattr(nn, self.name).set_value(new_value)
def float32(k):
return np.cast['float32'](k)
net = NeuralNet(
layers=[
('input', layers.InputLayer),
('conv1', layers.Conv2DLayer),
('pool1', layers.MaxPool2DLayer),
('dropout1', layers.DropoutLayer),
('conv2', layers.Conv2DLayer),
('pool2', layers.MaxPool2DLayer),
('dropout2', layers.DropoutLayer),
('conv3', layers.Conv2DLayer),
('pool3', layers.MaxPool2DLayer),
('dropout3', layers.DropoutLayer),
('hidden4', layers.DenseLayer),
('dropout4', layers.DropoutLayer),
('hidden5', layers.DenseLayer),
('output', layers.DenseLayer),
],
input_shape=(None, 1, 96, 96),
conv1_num_filters=32, conv1_filter_size=(3, 3), pool1_pool_size=(2, 2),
dropout1_p=0.1,
conv2_num_filters=64, conv2_filter_size=(2, 2), pool2_pool_size=(2, 2),
dropout2_p=0.2,
conv3_num_filters=128, conv3_filter_size=(2, 2), pool3_pool_size=(2, 2),
dropout3_p=0.3,
hidden4_num_units=300,
dropout4_p=0.5,
hidden5_num_units=300,
output_num_units=30, output_nonlinearity=None,
update_learning_rate=theano.shared(float32(0.03)),
update_momentum=theano.shared(float32(0.9)),
regression=True,
batch_iterator_train = BatchIterator(batch_size = 100),
batch_iterator_test = BatchIterator(batch_size = 100),
# batch_iterator_train=FlipBatchIterator(batch_size=128),
# on_epoch_finished=[
# AdjustVariable('update_learning_rate', start=0.03, stop=0.0001),
# AdjustVariable('update_momentum', start=0.9, stop=0.999),
# EarlyStopping(patience=200),
# ],
max_epochs=10,
verbose=1,
)
def fit_specialists(fname_pretrain=None):
if fname_pretrain:
with open(fname_pretrain, 'rb') as f:
net_pretrain = pickle.load(f)
else:
net_pretrain = None
specialists = OrderedDict()
for setting in SPECIALIST_SETTINGS:
cols = setting['columns']
X, y = load2d(cols=cols)
model = clone(net)
model.output_num_units = y.shape[1]
model.batch_iterator_train.flip_indices = setting['flip_indices']
model.max_epochs = int(4e6 / y.shape[0])
if 'kwargs' in setting:
# an option 'kwargs' in the settings list may be used to
# set any other parameter of the net:
vars(model).update(setting['kwargs'])
if net_pretrain is not None:
# if a pretrain model was given, use it to initialize the
# weights of our new specialist model:
model.load_params_from(net_pretrain)
print("Training model for columns {} for {} epochs".format(
cols, model.max_epochs))
model.fit(X, y)
specialists[cols] = model
with open('net-specialists.pickle', 'wb') as f:
# this time we're persisting a dictionary with all models:
pickle.dump(specialists, f, -1)
def predict(fname_specialists='net-specialists.pickle'):
with open(fname_specialists, 'rb') as f:
specialists = pickle.load(f)
X = load2d(test=True)[0]
y_pred = np.empty((X.shape[0], 0))
for model in specialists.values():
y_pred1 = model.predict(X)
y_pred = np.hstack([y_pred, y_pred1])
columns = ()
for cols in specialists.keys():
columns += cols
y_pred2 = y_pred * 48 + 48
y_pred2 = y_pred2.clip(0, 96)
df = DataFrame(y_pred2, columns=columns)
lookup_table = read_csv(os.path.expanduser(FLOOKUP))
values = []
for index, row in lookup_table.iterrows():
values.append((
row['RowId'],
df.ix[row.ImageId - 1][row.FeatureName],
))
now_str = datetime.now().isoformat().replace(':', '-')
submission = DataFrame(values, columns=('RowId', 'Location'))
filename = 'submission-{}.csv'.format(now_str)
submission.to_csv(filename, index=False)
print("Wrote {}".format(filename))
if __name__ == '__main__':
fit_specialists()
predict()
left_eye_center_x 7039
left_eye_center_y 7039
right_eye_center_x 7036
right_eye_center_y 7036
Image 7049
dtype: int64
Training model for columns ('left_eye_center_x', 'left_eye_center_y', 'right_eye_center_x', 'right_eye_center_y') for 568 epochs
/Library/Python/2.7/site-packages/lasagne/layers/conv.py:489: UserWarning: The `image_shape` keyword argument to `tensor.nnet.conv2d` is deprecated, it has been renamed to `input_shape`.
border_mode=border_mode)
# Neural Network with 4779676 learnable parameters
## Layer information
# name size
--- -------- ---------
0 input 1x96x96
1 conv1 32x94x94
2 pool1 32x47x47
3 dropout1 32x47x47
4 conv2 64x46x46
5 pool2 64x23x23
6 dropout2 64x23x23
7 conv3 128x22x22
8 pool3 128x11x11
9 dropout3 128x11x11
10 hidden4 300
11 dropout4 300
12 hidden5 300
13 output 4
# Neural Network with 4779676 learnable parameters
## Layer information
# name size
--- -------- ---------
0 input 1x96x96
1 conv1 32x94x94
2 pool1 32x47x47
3 dropout1 32x47x47
4 conv2 64x46x46
5 pool2 64x23x23
6 dropout2 64x23x23
7 conv3 128x22x22
8 pool3 128x11x11
9 dropout3 128x11x11
10 hidden4 300
11 dropout4 300
12 hidden5 300
13 output 4
epoch trn loss val loss trn/val dur
------- ---------- ---------- --------- -------
1 [36m0.01113[0m [32m0.00475[0m 2.34387 181.86s
epoch trn loss val loss trn/val dur
------- ---------- ---------- --------- -------
1 [36m0.01113[0m [32m0.00475[0m 2.34387 181.86s
Warning
單機執行實在是太慢了,這裡可以使用Amazon AWS的GPU例項來執行程式,建立過程如下參見:deep-learning-tutorial