深度學習AI美顏系列---性別識別
阿新 • • 發佈:2018-12-22
性別識別作為該系列的第一篇博文,是因為一張人像照片放在我們面前,我們首先需要判斷這個人像照片是男是女,然後才能根據男女分開進行相應的美顏!
本文使用Tensorflow來實現性別識別。
演算法
性別識別是一個簡單的分類,男和女兩類,我們使用簡單的CNN來實現,CNN的網路結構如下:
Fig.1 性別識別CNN網路結構示意圖
輸入圖片為大小為92X112的單通道灰度影象,如Fig.2所示,類別標籤(男標籤[1,0],女標籤[0,1]),所有引數均在網路結構圖中標註。
Fig.2輸入樣例圖
工程程式碼
資料集使用的是網路已有的資料集,下載連線在文末:
程式碼分為GenderUtils.py/GenderTrain.py/GenderTest.py三部分
GenderUtils.py中定義了相關的函式,如下:
# AGE import matplotlib.image as img import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from tensorflow.python.framework import ops import math import os import csv def create_placeholders(n_H0, n_W0, n_C0, n_y): """ Creates the placeholders for the tensorflow session. Arguments: n_H0 -- scalar, height of an input image n_W0 -- scalar, width of an input image n_C0 -- scalar, number of channels of the input n_y -- scalar, number of classes Returns: X -- placeholder for the data input, of shape [None, n_H0, n_W0, n_C0] and dtype "float" Y -- placeholder for the input labels, of shape [None, n_y] and dtype "float" """ X = tf.placeholder(name='X', shape=(None, n_H0, n_W0, n_C0), dtype=tf.float32) Y = tf.placeholder(name='Y', shape=(None, n_y), dtype=tf.float32) return X, Y def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0): """ Creates a list of random minibatches from (X, Y) Arguments: X -- input data, of shape (input size, number of examples) (m, Hi, Wi, Ci) Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) (m, n_y) mini_batch_size - size of the mini-batches, integer seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours. Returns: mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y) """ m = X.shape[0] # number of training examples mini_batches = [] np.random.seed(seed) # Step 1: Shuffle (X, Y) permutation = list(np.random.permutation(m)) shuffled_X = X[permutation,:,:,:] shuffled_Y = Y[permutation,:] # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case. num_complete_minibatches = int(math.floor(m / mini_batch_size)) # number of mini batches of size mini_batch_size in your partitionning for k in range(0, int(num_complete_minibatches)): mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:] mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:] mini_batch = (mini_batch_X, mini_batch_Y) mini_batches.append(mini_batch) # Handling the end case (last mini-batch < mini_batch_size) if m % mini_batch_size != 0: mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:] mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:] mini_batch = (mini_batch_X, mini_batch_Y) mini_batches.append(mini_batch) return mini_batches def row_csv2dict(csv_file): dict_club={} with open(csv_file)as f: reader=csv.reader(f,delimiter=',') for row in reader: dict_club[row[0]]=row[1] return dict_club def input_data(): path = "data/train/" train_num = sum([len(x) for _, _, x in os.walk(os.path.dirname(path))]) image_train = np.zeros((train_num,112,92)) label_train = np.ones((train_num,2)) train_label_dict = row_csv2dict("data/train.csv") count = 0 for key in train_label_dict: if int(train_label_dict[key]) == 0: label_train[count, 0] = 1 label_train[count, 1] = 0 else: label_train[count, 1] = 1 label_train[count, 0] = 0 filename = path + str(key) image_train[count] = img.imread(filename) count = count + 1 path = "data/test/" test_num = sum([len(x) for _, _, x in os.walk(os.path.dirname(path))]) image_test = np.zeros((test_num, 112,92)) label_test = np.ones((test_num,2)) test_label_dict = row_csv2dict("data/test.csv") count = 0 for key in test_label_dict: if int(test_label_dict[key]) == 0: label_test[count, 0] = 1 label_test[count, 1] = 0 else: label_test[count, 1] = 1 label_test[count, 0] = 0 filename = path + str(key) image_test[count] = img.imread(filename) count = count + 1 return image_train, label_train,image_test, label_test def weight_variable(shape,name): return tf.Variable(tf.truncated_normal(shape, stddev = 0.1),name=name) def bias_variable(shape,name): return tf.Variable(tf.constant(0.1, shape = shape),name=name) def conv2d(x,w,padding="SAME"): if padding=="SAME" : return tf.nn.conv2d(x, w, strides = [1,1,1,1], padding = "SAME") else: return tf.nn.conv2d(x, w, strides = [1,1,1,1], padding = "VALID") def max_pool(x, kSize, Strides): return tf.nn.max_pool(x, ksize = [1,kSize,kSize,1],strides = [1,Strides,Strides,1], padding = "SAME") def compute_cost(Z3, Y): """ Computes the cost Arguments: Z3 -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples) Y -- "true" labels vector placeholder, same shape as Z3 Returns: cost - Tensor of the cost function """ cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3, labels=Y)) return cost def initialize_parameters(): tf.set_random_seed(1) W1 = tf.cast(weight_variable([5,5,1,32],"W1"), dtype = tf.float32) b1 = tf.cast(bias_variable([32],"b1"), dtype = tf.float32) W2 = tf.cast(weight_variable([5,5,32,64],"W2"), dtype = tf.float32) b2 = tf.cast(bias_variable([64],"b2"), dtype = tf.float32) W3 = tf.cast(weight_variable([5,5,64,128],"W3"), dtype = tf.float32) b3 = tf.cast(bias_variable([128],"b3"), dtype = tf.float32) W4 = tf.cast(weight_variable([14*12*128,500],"W4"), dtype = tf.float32) b4 = tf.cast(bias_variable([500],"b4"), dtype = tf.float32) W5 = tf.cast(weight_variable([500,500],"W5"), dtype = tf.float32) b5 = tf.cast(bias_variable([500],"b5"), dtype = tf.float32) W6 = tf.cast(weight_variable([500,2],"W6"), dtype = tf.float32) b6 = tf.cast(bias_variable([2],"b6"), dtype = tf.float32) parameters = {"W1":W1, "b1":b1, "W2":W2, "b2":b2, "W3":W3, "b3":b3, "W4":W4, "b4":b4, "W5":W5, "b5":b5, "W6":W6, "b6":b6} return parameters def cnn_net(x, parameters, keep_prob = 1.0): #frist convolution layer w_conv1 = parameters["W1"] b_conv1 = parameters["b1"] h_conv1 = tf.nn.relu(conv2d(x,w_conv1) + b_conv1) #output size 112x92x32 h_pool1 = max_pool(h_conv1,2,2) #output size 56x46x32 #second convolution layer w_conv2 = parameters["W2"] b_conv2 = parameters["b2"] h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2) + b_conv2) #output size 56x46x64 h_pool2 = max_pool(h_conv2,2,2) #output size 28x23x64 #third convolution layer w_conv3 = parameters["W3"] b_conv3 = parameters["b3"] h_conv3 = tf.nn.relu(conv2d(h_pool2,w_conv3) + b_conv3) #output size 28x23x128 h_pool3 = max_pool(h_conv3,2,2) #output size 14x12x128 #full convolution layer w_fc1 = parameters["W4"] b_fc1 = parameters["b4"] h_fc11 = tf.reshape(h_pool3,[-1,14*12*128]) h_fc1 = tf.nn.relu(tf.matmul(h_fc11,w_fc1) + b_fc1) w_fc2 = parameters["W5"] b_fc2 = parameters["b5"] h_fc2 = tf.nn.relu(tf.matmul(h_fc1,w_fc2)+b_fc2) h_fc2_drop = tf.nn.dropout(h_fc2,keep_prob) w_fc3 = parameters["W6"] b_fc3 = parameters["b6"] y_conv = tf.matmul(h_fc2_drop, w_fc3) + b_fc3 #y_conv = tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3) #rmse = tf.sqrt(tf.reduce_mean(tf.square(y_ - y_conv))) #cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = y_conv)) #train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy) #correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y,1)) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return y_conv def save_model(saver,sess,save_path): path = saver.save(sess, save_path) print 'model save in :{0}'.format(path)
GenderTrain.py如下:
# AGE import matplotlib.image as img import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from tensorflow.python.framework import ops import math import os import csv from GenderUtils import input_data,create_placeholders,random_mini_batches,row_csv2dict,weight_variable,bias_variable,conv2d,max_pool,compute_cost,initialize_parameters,cnn_net,save_model np.random.seed(1) tf.reset_default_graph() def model(X_train, Y_train, X_test, Y_test,learning_rate = 0.001, num_epochs = 100, minibatch_size = 64, print_cost = True): """ Implements a three-layer ConvNet in Tensorflow: CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED Arguments: X_train -- training set, of shape (None, 112, 92, 1) Y_train -- test set, of shape (None, n_y = 2) X_test -- training set, of shape (None, 112, 92, 1) Y_test -- test set, of shape (None, n_y = 2) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: train_accuracy -- real number, accuracy on the train set (X_train) test_accuracy -- real number, testing accuracy on the test set (X_test) parameters -- parameters learnt by the model. They can then be used to predict. """ # ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep results consistent (tensorflow seed) seed = 3 # to keep results consistent (numpy seed) (m, n_H0, n_W0,n_C0) = X_train.shape n_y = Y_train.shape[1] costs = [] SAVE_PATH = "model/mymodel" print("X_train shape:",str(X_train.shape)) # Create Placeholders of the correct shape X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y) print("Y shape:", str(Y)) # Initialize parameters parameters = initialize_parameters() # cnn Z3 = cnn_net(X, parameters) # Cost function cost = compute_cost(Z3, Y) # Backpropagation:Define the tensorflow optimizer. optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # Inizialize all the variables globally init = tf.global_variables_initializer() # training process saver = tf.train.Saver(max_to_keep=3) with tf.Session() as sess: # Run the initialization sess.run(init) # Do the training loop for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int(m / minibatch_size) seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X,minibatch_Y) = minibatch _,temp_cost = sess.run([optimizer, cost], feed_dict = {X:minibatch_X, Y:minibatch_Y}) minibatch_cost += temp_cost / num_minibatches if print_cost == True and epoch % 5 == 0: print("Cost after epoch %i : %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) # plot the cost #plt.plot(np.squeeze(costs)) #plt.ylabel("cost") #plt.xlabel("iterations (per tens)") #plt.title("Lerning ratge =" + str(learning_rate)) #plt.show() # Calculate the correct predictions predict_op = tf.argmax(Z3, 1) correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1)) # Calculate accuracy on the test accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuracy) train_batch_num = int(math.floor(X_train.shape[0] / minibatch_size)) train_accuracy = 0. for i in range(train_batch_num): train_accuracy += 1.0 / train_batch_num * accuracy.eval({X: X_train[i * minibatch_size:(i+1)*minibatch_size,:,:,:],Y:Y_train[i * minibatch_size:(i+1)*minibatch_size,:]}) test_batch_num = int(X_test.shape[0] / minibatch_size) test_accuracy = 0. for i in range(test_batch_num): test_accuracy += 1.0 / test_batch_num * accuracy.eval({X: X_test[i * minibatch_size:(i+1)*minibatch_size,:,:,:],Y:Y_test[i * minibatch_size:(i+1)*minibatch_size,:]}) print("Train Accuracy:", train_accuracy) print("Test Accuracy:", test_accuracy) save_model(saver,sess,SAVE_PATH) print("Z3's shape:", str(Z3.shape)) return train_accuracy, test_accuracy, parameters image_train, label_train, image_test, label_test = input_data() image_train = image_train.reshape(image_train.shape[0],image_train.shape[1],image_train.shape[2],1) image_test = image_test.reshape(image_test.shape[0],image_test.shape[1],image_test.shape[2],1) image_train = image_train / 255. image_test = image_test / 255. model(image_train, label_train, image_test, label_test)
測試部分GenderTest.py:
# AGE import matplotlib.image as img import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from tensorflow.python.framework import ops import math from GenderUtils import create_placeholders,weight_variable,bias_variable,conv2d,max_pool,compute_cost,initialize_parameters,cnn_net np.random.seed(1) tf.reset_default_graph() parameters = initialize_parameters() saver = tf.train.Saver() with tf.Session() as sess: tf.set_random_seed(1) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir = 'model/') print(ckpt.model_checkpoint_path) saver.restore(sess,ckpt.model_checkpoint_path) parameters = {"W1":sess.run(parameters["W1"]), "b1":sess.run(parameters["b1"]), "W2":sess.run(parameters["W2"]), "b2":sess.run(parameters["b2"]), "W3":sess.run(parameters["W3"]), "b3":sess.run(parameters["b3"]), "W4":sess.run(parameters["W4"]), "b4":sess.run(parameters["b4"]), "W5":sess.run(parameters["W5"]), "b5":sess.run(parameters["b5"]), "W6":sess.run(parameters["W6"]), "b6":sess.run(parameters["b6"])} #the image inputs is gray image with three channels. image = img.imread("data/T3.bmp") image_test = image[:,:,0] print("image_test shape:", str(image_test.shape)) image = image_test.reshape(1,image_test.shape[0],image_test.shape[1],1) # image = image.reshape(1,image_test.shape[0],image_test.shape[1],1) image = image / 255. imaget = tf.image.convert_image_dtype(image, tf.float32) print("image shape: %", str(imaget.shape)) res = cnn_net(imaget, parameters) print("result: ",sess.run(tf.argmax(res, 1))) print(str(res.shape)) print(res.eval())
結果
由於樣本比較少,訓練結果如下:
Fig.3訓練結果圖
下載連線
最後給出整個工程程式碼及資料集的下載連線:
百度網盤:連結 密碼: 5wst
Github:連線
關於訓練和測試資料讀取的問題,請參考部落格:點選開啟連結