softmax損失函式 在 mnist 上的二維分佈
阿新 • • 發佈:2018-11-13
訓練部分
import numpy as np import tensorflow as tf import tensorflow.examples.tutorials.mnist.input_data as input_data # number 1 to 10 data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels def compute_accuracy(v_xs, v_ys): global prediction global accuracy y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1}) correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) result = sess.run(accuracy) # correct_prediction = np.mean(np.argmax(y_pre, axis = 1) == np.argmax(v_ys, axis = 1)) # result = correct_prediction return result def weight_variable(shape, name='weight'): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial, name) def bias_variable(shape, name='b'): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial, name) def conv2d(x, W): # stride [1, x_movement, y_movement, 1] # Must have strides[0] = strides[3] = 1 return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): # stride [1, x_movement, y_movement, 1] return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # three optimistic method #1.R 衰減 2.Regulation 3.滑動平均 視窗 LEARNING_RATE_BASE= 0.0008 #基礎 學習 率 LEARNING_RATE_DECAY = 0.99 #衰減率 REGULARIZATION_RATE = 0.0001 #Regulation MOVING_AVERAGE_DECAY = 0.99 # 滑動平均 衰減率 global_step = tf.Variable(0, trainable=False) # define placeholder for inputs to network xs = tf.placeholder(tf.float32, [None, 784]) # 28x28 ys = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32) x_image = tf.reshape(xs, [-1, 28, 28, 1]) # print(x_image.shape) # [n_samples, 28,28,1] ## conv1 layer ## W_conv1 = weight_variable([5, 5, 1, 32]) # patch 5x5, in size 1, out size 32 b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32 h_pool1 = max_pool_2x2(h_conv1) # output size 14x14x32 ## conv2 layer ## W_conv2 = weight_variable([5, 5, 32, 64]) # patch 5x5, in size 32, out size 64 b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64 h_pool2 = max_pool_2x2(h_conv2) # output size 7x7x64 ## func1 layer ## W_fc0 = weight_variable([7 * 7 * 64, 128]) b_fc0 = bias_variable([128]) W_fc1 = weight_variable([128, 2]) b_fc1 = bias_variable([2]) W_fc2 = weight_variable([2, 10], name='Weight') b_fc2 = bias_variable([10], name='Bias') #滑動 平均 variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #variables_averages_op = variable_averages.apply(tf.trainable_variables()) #在 沒 有 指定 trainable = False 的 變數 生效 variables_averages_op = variable_averages.apply([W_fc1,b_fc1, W_fc2, b_fc2]) # [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64] h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) +b_fc0) h_fc0_drop = tf.nn.dropout(h_fc0, keep_prob) h_fc1 = tf.nn.relu(tf.matmul(h_fc0_drop, W_fc1) +b_fc1) ## func2 layer ## tf.summary.histogram('Weight', W_fc2) tf.summary.histogram('Bias', b_fc2) y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2 #prediction = tf.nn.softmax(y_conv) 需要 滑動平均 視窗 進行 預測 prediction = tf.nn.softmax(tf.matmul(h_fc1, variable_averages.average(W_fc2)) + variable_averages.average(b_fc2)) # the error between prediction and real data # cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) # loss # cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, ys)) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=ys)) #reguraztion regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) regularization = regularizer(W_fc1) + regularizer(W_fc2) loss = cross_entropy + regularization #R 衰減 率 learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples, LEARNING_RATE_DECAY) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # train_step = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy) ##每次 更新 W b 的 值 後 更新 滑動 平均 值 with tf.control_dependencies([train_step, variables_averages_op]): train_op = tf.no_op(name = 'train') tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() # summary writer goes in here sess = tf.Session() init = tf.global_variables_initializer() train_writer = tf.summary.FileWriter("path/to/logs", sess.graph) # test_writer = tf.summary.FileWriter("logs/test", sess.graph) saver = tf.train.Saver() # important step sess.run(init) print("begin train") for i in range(5000): global_step = + 1 batch_xs, batch_ys = mnist.train.next_batch(100) batch_tx, batch_ty = mnist.test.next_batch(100) #可以 使用 下面 註釋 兩行 替代 sess.run(train_op, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5}) #每次 更新 W b 的 值 後 更新 滑動 平均 值 #sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5}) #sess.run(variables_averages_op) if i % 10 == 0: # print(i) # prediction = sess.run(prediction, feed_dict={xs : batch_tx, keep_prob: 0.5}) # print(prediction.shape) # break train_result = sess.run(merged, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 1}) test_result = sess.run(merged, feed_dict={xs: batch_tx, ys: batch_ty, keep_prob: 1}) train_writer.add_summary(train_result, i) # test_writer.add_summary(test_result, i) print(compute_accuracy( batch_tx, batch_ty)) saver.save(sess, "save_path/mnist_2d.module")
測試部分
import numpy as np import tensorflow as tf import tensorflow.examples.tutorials.mnist.input_data as input_data import matplotlib.pyplot as plt import numpy as np # number 1 to 10 data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels def compute_accuracy(v_xs, v_ys): global prediction global accuracy y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1}) correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) result = sess.run(accuracy) # correct_prediction = np.mean(np.argmax(y_pre, axis = 1) == np.argmax(v_ys, axis = 1)) # result = correct_prediction return result def weight_variable(shape, name='weight'): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial, name) def bias_variable(shape, name='b'): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial, name) def conv2d(x, W): # stride [1, x_movement, y_movement, 1] # Must have strides[0] = strides[3] = 1 return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): # stride [1, x_movement, y_movement, 1] return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # three optimistic method #1.R 衰減 2.Regulation 3.滑動平均 視窗 LEARNING_RATE_BASE= 0.0008 #基礎 學習 率 LEARNING_RATE_DECAY = 0.99 #衰減率 REGULARIZATION_RATE = 0.0001 #Regulation MOVING_AVERAGE_DECAY = 0.99 # 滑動平均 衰減率 global_step = tf.Variable(0, trainable=False) # define placeholder for inputs to network xs = tf.placeholder(tf.float32, [None, 784]) # 28x28 ys = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32) x_image = tf.reshape(xs, [-1, 28, 28, 1]) # print(x_image.shape) # [n_samples, 28,28,1] ## conv1 layer ## W_conv1 = weight_variable([5, 5, 1, 32]) # patch 5x5, in size 1, out size 32 b_conv1 = bias_variable([32]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32 h_pool1 = max_pool_2x2(h_conv1) # output size 14x14x32 ## conv2 layer ## W_conv2 = weight_variable([5, 5, 32, 64]) # patch 5x5, in size 32, out size 64 b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64 h_pool2 = max_pool_2x2(h_conv2) # output size 7x7x64 ## func1 layer ## W_fc0 = weight_variable([7 * 7 * 64, 128]) b_fc0 = bias_variable([128]) W_fc1 = weight_variable([128, 2]) b_fc1 = bias_variable([2]) W_fc2 = weight_variable([2, 10], name='Weight') b_fc2 = bias_variable([10], name='Bias') #滑動 平均 variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #variables_averages_op = variable_averages.apply(tf.trainable_variables()) #在 沒 有 指定 trainable = False 的 變數 生效 variables_averages_op = variable_averages.apply([W_fc1,b_fc1, W_fc2, b_fc2]) # [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64] h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc0 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc0) +b_fc0) h_fc0_drop = tf.nn.dropout(h_fc0, keep_prob) h_fc1_ = tf.matmul(h_fc0_drop, W_fc1) +b_fc1 h_fc1 = tf.nn.relu(h_fc1_) ## func2 layer ## tf.summary.histogram('Weight', W_fc2) tf.summary.histogram('Bias', b_fc2) y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2 #prediction = tf.nn.softmax(y_conv) 需要 滑動平均 視窗 進行 預測 prediction = tf.nn.softmax(tf.matmul(h_fc1, variable_averages.average(W_fc2)) + variable_averages.average(b_fc2)) # the error between prediction and real data # cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) # loss # cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, ys)) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=ys)) #reguraztion regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) regularization = regularizer(W_fc1) + regularizer(W_fc2) loss = cross_entropy + regularization #R 衰減 率 learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples, LEARNING_RATE_DECAY) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) # train_step = tf.train.GradientDescentOptimizer(1e-3).minimize(cross_entropy) ##每次 更新 W b 的 值 後 更新 滑動 平均 值 with tf.control_dependencies([train_step, variables_averages_op]): train_op = tf.no_op(name = 'train') tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() # summary writer goes in here sess = tf.Session() init = tf.global_variables_initializer() train_writer = tf.summary.FileWriter("path/to/logs", sess.graph) # test_writer = tf.summary.FileWriter("logs/test", sess.graph) sess = tf.Session() init = tf.global_variables_initializer() # important step sess.run(init) saver = tf.train.Saver() saver.restore(sess, "save_path/mnist_2d.module") global test_result global batch_ty sample = 20000 batch_tx, batch_ty = mnist.test.next_batch(sample) test_result = sess.run(h_fc1_, feed_dict={xs: batch_tx, ys: batch_ty, keep_prob: 1}) print(compute_accuracy(batch_tx, batch_ty)) #print(test_result) batch_ty = np.argmax(batch_ty, axis=1) print(batch_ty[0:10]) fig = plt.figure() ax1 = fig.add_subplot(111) # 設定標題 ax1.set_title('Scatter Plot') # 設定X軸標籤 plt.xlabel('X') # 設定Y軸標籤 plt.ylabel('Y') # 畫散點圖 cValue = ['pink','orange','g','cyan','r','y','gray','purple','black', 'b'] # 0 粉紅 1 橙色 2 綠色 3 青色 4 紅色 5 黃色 6 灰色 7 紫色 8 黑色 9藍色 for i in range(0, 10): # print(test_result[:, 0]) x = [] y = [] for j in range(0, sample): if i == batch_ty[j]: x.append(test_result[j, 0]) y.append(test_result[j, 1]) # if (test_result[j, 0] + test_result[j, 1]) < 10: # im = np.array(batch_tx[0]) # im = im.reshape(28, 28) # plt.imshow(im, cmap='gray') # plt.show() ax1.scatter(x, y, c=cValue[i], marker='.') # 設定圖示 plt.legend('x1') # 顯示所畫的圖 plt.show()