實戰tensorflow——自編碼器
阿新 • • 發佈:2018-12-10
自編碼器簡介: 深度學習提取的是頻繁出現的特徵;特徵是需要不斷抽象的,它從見到的微觀特徵開始,不斷抽象特徵的層級,逐漸網複雜的巨集觀特徵轉變。 特徵的稀疏表達:使用少量的基本特徵組合拼裝得到更高層抽象的特徵 Hinton的思路就是先用自編碼器的方法進行無監督的預訓練,提取特徵並初始化權重,然後使用標註資訊進行監督式的學習。
層數越多,神經網路所需要的隱含節點可以越少。
層數較深的神經網路的缺點:容易過擬合,引數難以除錯,梯度彌散 防止過擬合的方法: ①dropout:大致思路是,在訓練時,將神經網路某一層的輸出節點資料隨機丟棄;實質上等於創造了很多新的隨機樣本
梯度彌散:當神經網路層數較多時,Sigmoid函式在反向傳播中梯度值會逐漸減小,導致根據訓練資料的反饋來更新神經網路的引數將會十分緩慢。
RELu對比Sigmoid的主要變化有如下 ①單側抑制 ②相對寬闊的興奮邊界 ③稀疏啟用性
卷積神經網路的應用: ①影象和視訊 ②時間序列訊號 ③音訊訊號 ④文字資料
卷積層的幾個操作: ①Wx+b ②進行非線性的啟用函式處理(ReLU函式) ③池化:即降取樣,將2x2圖片降為1x1的圖片;目前使用最大池化,保留最顯著的特徵,提升模型的畸變容忍能力 ④最常見的最
#%% # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import numpy as np import sklearn.preprocessing as prep import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data #實現的是標準的均勻分佈的Xaiver初始化器 def xavier_init(fan_in, fan_out, constant = 1): low = -constant * np.sqrt(6.0 / (fan_in + fan_out)) high = constant * np.sqrt(6.0 / (fan_in + fan_out)) return tf.random_uniform((fan_in, fan_out), minval = low, maxval = high, dtype = tf.float32) class AdditiveGaussianNoiseAutoencoder(object): #n_input:輸出變數數 #n_hidden隱藏層節點數 #transfer_function 隱含層啟用函式 #optimizer:優化器,預設為Adam #scale:高斯噪聲係數 def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(), scale = 0.1): self.n_input = n_input self.n_hidden = n_hidden self.transfer = transfer_function self.scale = tf.placeholder(tf.float32) self.training_scale = scale network_weights = self._initialize_weights() #初始化網路引數 self.weights = network_weights # model 定義網路結構 #建立一個維度為n_input的佔位符 self.x = tf.placeholder(tf.float32, [None, self.n_input]) #建立一個能提取特徵的隱藏層 #限將輸入x加上噪聲,即self.x+scale*tf.random_normal((n_input),), self.hidden = self.transfer(tf.add(tf.matmul(self.x + scale * tf.random_normal((n_input,)), self.weights['w1']), self.weights['b1'])) #在輸出層進行資料復原,重建操作 #tf.add 向量加 #tf.matmul 向量乘 self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2']) # cost #損失的定義,並求和 self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0)) #定義訓練操作為優化器self.optimizer對self.cost進行優化 self.optimizer = optimizer.minimize(self.cost) init = tf.global_variables_initializer() self.sess = tf.Session() #初始化自編碼器的全部模型引數 self.sess.run(init) #初始化權重 def _initialize_weights(self): all_weights = dict() #初始化w1,這裡是向量操作;使用前面定義xavier_init函式初始化 all_weights['w1'] = tf.Variable(xavier_init(self.n_input, self.n_hidden)) all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32)) all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32)) all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32)) return all_weights #定義損失cost級執行一步訓練的函式partial_fit def partial_fit(self, X): #用一個batch資料進行訓練並返回當前的損失cost cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict = {self.x: X, self.scale: self.training_scale }) return cost def calc_total_cost(self, X): #讓session執行一個計算圖節點 return self.sess.run(self.cost, feed_dict = {self.x: X, self.scale: self.training_scale }) #返回自編碼器隱含層的輸出結果 def transform(self, X): return self.sess.run(self.hidden, feed_dict = {self.x: X, self.scale: self.training_scale }) #將隱含層的輸出結果作為輸入,通過之後的重建層將提取到的高階特徵復原為原始資料。 def generate(self, hidden = None): if hidden is None: hidden = np.random.normal(size = self.weights["b1"]) return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden}) #包裹提取高階特徵和通過高階特徵復原資料 def reconstruct(self, X): return self.sess.run(self.reconstruction, feed_dict = {self.x: X, self.scale: self.training_scale }) #獲取隱藏層的權證w1 def getWeights(self): return self.sess.run(self.weights['w1']) #獲取隱藏層額偏執係數b1 def getBiases(self): return self.sess.run(self.weights['b1']) mnist = input_data.read_data_sets('MNIST_data', one_hot = True) #讓資料變成0均值,且標準差為1的分佈 def standard_scale(X_train, X_test): #保證訓練,測試資料都使用完全相同的Scaler preprocessor = prep.StandardScaler().fit(X_train) X_train = preprocessor.transform(X_train) X_test = preprocessor.transform(X_test) return X_train, X_test #隨機獲取block資料的函式 def get_random_block_from_data(data, batch_size): start_index = np.random.randint(0, len(data) - batch_size) return data[start_index:(start_index + batch_size)] #對訓練集,測試集進行標準化變換 X_train, X_test = standard_scale(mnist.train.images, mnist.test.images) #總訓練樣本數 n_samples = int(mnist.train.num_examples) #最大訓練的輪數 training_epochs = 20 batch_size = 128 display_step = 1 autoencoder = AdditiveGaussianNoiseAutoencoder(n_input = 784, n_hidden = 200, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(learning_rate = 0.001), scale = 0.01) #開始每輪迴圈 for epoch in range(training_epochs): avg_cost = 0. #batch的數量 total_batch = int(n_samples / batch_size) # Loop over all batches for i in range(total_batch): #獲取一個batch的資料 batch_xs = get_random_block_from_data(X_train, batch_size) # Fit training using batch data cost = autoencoder.partial_fit(batch_xs) # Compute average loss avg_cost += cost / n_samples * batch_size # Display logs per epoch step if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)) print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))