《面向機器智慧的TensorFlow實踐》StanfordDog完整修改
阿新 • • 發佈:2019-01-07
以下轉載自部落格:https://blog.csdn.net/fnhc462354756/article/details/79872994
主要是為了備份。當然,他的程式碼參考了我之前的一篇部落格,註釋都還沒變,哈哈。
《面向機器智慧的TensorFlow實踐》深入淺出,將tensorflow的很多概念講的很清楚,很適合tensorflow的初學者學習。該書完整的程式碼在https://github.com/backstopmedia/tensorflowbook點選開啟連結可以下載到。
在學習Standfor dog專案時,發現很多部落格都沒能很好的解決最後準確性問題。然後我仔細研究,發現了其中的問題,具體請參考我的github: https://github.com/Alex-AI-Du/Tensorflow-Tutorial/tree/master/standford_dog
如有問題可以聯絡我
""" Note:2018.3.30 """ import tensorflow as tf import glob from itertools import groupby from collections import defaultdict from PIL import Image import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #忽略煩人的警告 IMAGE_WIDTH = 256 IMAGE_HEIGHT = 256 sess = tf.InteractiveSession() #查詢符合一定規則的所有檔案,並將檔名以lis形式返回。 #image_filenames = glob.glob(r"G:\AI\Images\n02110*\*.jpg") image_filenames = glob.glob(r"G:\AI\Images\n02*\*.jpg") #這句是我新增的。因為讀到的路徑形式為:'./imagenet-dogs\\n02085620-Chihuahua\\n02085620_10074.jpg',路徑分隔符中除第1個之外,都是2個反斜槓,與例程不一致。這裡將2個反斜槓替換為斜槓 #image_filenames = list(map(lambda image: image.replace('\\', '/'), image_filenames_0)) #用list型別初始化training和testing資料集,用defaultdict的好處是為字典中不存在的鍵提供預設值 training_dataset = defaultdict(list) testing_dataset = defaultdict(list) #將品種名從檔名中切分出,image_filename_with_breed是一個迭代器,用list(image_filename_with_breed)將其轉換為list,其中的元素類似於:('n02085620-Chihuahua', './imagenet-dogs/n02085620-Chihuahua/n02085620_10131.jpg')。 image_filename_with_breed = list(map(lambda filename: (filename.split("\\")[-2], filename), image_filenames)) ## Group each image by the breed which is the 0th element in the tuple returned above #groupby後得到的是一個迭代器,每個元素的形式為:('n02085620-Chihuahua', <itertools._grouper at 0xd5892e8>),其中第1個元素為種類;第2個元素代表該類的檔案,這兩個元素也分別對應for迴圈裡的dog_breed和breed_images。 for dog_breed, breed_images in groupby(image_filename_with_breed, lambda x: x[0]): #enumerate的作用是列舉breed_images中的所有元素,可同時返回索引和元素,i和breed_image #的最後一個值分別是:168、('n02116738-African_hunting_dog', './imagenet-dogs/ #n02116738-African_hunting_dog/n02116738_9924.jpg') for i, breed_image in enumerate(breed_images): #因為breed_images是按類分別儲存的,所以下面是將大約20%的資料作為測試集,大約80%的 #資料作為訓練集。 #testing_dataset和training_dataset是兩個字典,testing_dataset中 #的第一個元素是 'n02085620-Chihuahua': ['./imagenet-dogs/n02085620-Chihuahua/ #n02085620_10074.jpg', './imagenet-dogs/n02085620-Chihuahua/ #n02085620_11140.jpg',.....] if i % 5 == 0: testing_dataset[dog_breed].append(breed_image[1]) else: training_dataset[dog_breed].append(breed_image[1]) # 測試每種型別下的測試集是否至少包含了18%的資料 breed_training_count = len(training_dataset[dog_breed]) breed_testing_count = len(testing_dataset[dog_breed]) assert round(breed_testing_count / (breed_training_count + breed_testing_count), 2) > 0.18, "Not enough testing images." def write_records_file(dataset, record_location): """ Fill a TFRecords file with the images found in `dataset` and include their category. Parameters ---------- dataset : dict(list) Dictionary with each key being a label for the list of image filenames of its value. record_location : str Location to store the TFRecord output. """ if not os.path.exists(record_location): print("目錄 %s 不存在,自動建立中..." % (record_location)) os.makedirs(record_location) writer = None # Enumerating the dataset because the current index is used to breakup the files if they get over 100 # images to avoid a slowdown in writing. current_index = 0 #遍歷每一種型別的所有檔案 for breed, images_filenames in dataset.items(): #遍歷每一個檔案 for image_filename in images_filenames: if current_index % 1000 == 0: if writer: writer.close() #建立tensorflow record的檔名 record_filename = "{record_location}-{current_index}.tfrecords".format( record_location=record_location, current_index=current_index) writer = tf.python_io.TFRecordWriter(record_filename) current_index += 1 ''' image_file = tf.read_file(image_filename) #將圖片按照jpeg格式解析,ImageNet dogs中有些圖片按照JPEG解析時會出錯,用try #語句忽視解析錯誤的圖片。 try: image = tf.image.decode_jpeg(image_file) except: print(image_filename) continue # 轉換為灰度影象.經測試最好不要轉換灰度,grayscale_image會是增加原影象的10倍處理時間。絕對是個坑!!! #grayscale_image = tf.image.rgb_to_grayscale(image) #此處做了修改,resize_images的第二個引數要求是tensor,原始碼有誤。 #resized_image = tf.image.resize_images(grayscale_image, 250, 151) resized_image = tf.image.resize_images(image, [250, 151]) # tf.cast is used here because the resized images are floats but haven't been converted into # image floats where an RGB value is between [0,1). image_bytes = sess.run(tf.cast(resized_image, tf.uint8)).tobytes() ''' #使用Image.open讀取影象比tf.read_file的速度快10倍,建議使用Image.open image = Image.open(image_filename) image = image.resize((IMAGE_WIDTH, IMAGE_HEIGHT)) image_bytes = image.tobytes() # 將圖片轉成二進位制 # Instead of using the label as a string, it'd be more efficient to turn it into either an # integer index or a one-hot encoded rank one tensor. # https://en.wikipedia.org/wiki/One-hot #將表示種類的字串轉換為python預設的utf-8格式,防止有問題 image_label = breed.encode("utf-8") ## 建立一個 example protocol buffer 。 # 其中,feature={ # 'label': # tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label])), # 'image': # tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])) # })是建立1個屬性 example = tf.train.Example( features=tf.train.Features(feature={ 'label': tf.train.Feature(bytes_list=tf.train.BytesList( value=[image_label])), 'image': tf.train.Feature(bytes_list=tf.train.BytesList( value=[image_bytes])) })) #SerializeToString()將檔案序列化為二進位制字串 writer.write(example.SerializeToString()) writer.close() #分別將測試資料和訓練資料寫入tensorflow record,分別儲存在資料夾./output/testing-images/和./output/ #training-images/下面。 write_records_file(testing_dataset, "F:/TS/TS_p_c/output/testing-images/testing-image") write_records_file(training_dataset, "F:/TS/TS_p_c/output/training-images/training-image")
""" Note:2018.3.30 """ import tensorflow as tf from tensorflow.python.ops import random_ops import math import numpy as np import glob import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #忽略煩人的警告 BATCH_SIZE = 10 IMAGE_WIDTH = 256 IMAGE_HEIGHT = 256 IMAGE_CHANNEL = 3 #———————————————————————————————————————影象預處理———————————————————————————————————————————— #從檔案佇列中讀取batch_size個檔案,用於訓練或測試 def read_tfrecord(serialized, batch_size): #parse_single_example解析器將中的example協議記憶體塊解析為張量, #每個tfrecord中有多幅圖片,但parse_single_example只提取單個樣本, #parse_single_example只是解析tfrecord,並不對影象進行解碼 features = tf.parse_single_example( serialized, features={ 'label': tf.FixedLenFeature([], tf.string), 'image': tf.FixedLenFeature([], tf.string), }) #將影象檔案解碼為uint8,因為所有通道的資訊都處於0~255,然後reshape record_image = tf.decode_raw(features['image'], tf.uint8) image = tf.reshape(record_image, [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNEL]) #將label平化為字串 label = tf.cast(features['label'], tf.string) #用於生成batch的緩衝佇列的大小,下面採用的是經驗公式 min_after_dequeue = 1000 capacity = min_after_dequeue + 3 * batch_size #生成image_batch和label_batch image_batch, label_batch = tf.train.shuffle_batch( [image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) return image_batch, label_batch # Converting the images to a float of [0,1) to match the expected input to convolution2d def convert_image(image_batch): return (tf.image.convert_image_dtype(image_batch, tf.float32)) # Match every label from label_batch and return the index where they exist in the list of classes def find_index_label(label_batch): return (tf.map_fn(lambda l: tf.where(tf.equal(labels_all, l))[0, 0:1][0], label_batch, dtype=tf.int64)) #————————————————————————————————————————建立CNN———————————————————————————————————————————————— #佔位符,None代表輸入的資料個數不確定 image_holder = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNEL]) label_holder = tf.placeholder(tf.int64, [BATCH_SIZE]) keep_prob_holder = tf.placeholder(tf.float32) #dropout保留的比例 #此部分程式碼是建立卷積層時weights_initializer用到的初始化函式, #書中程式碼沒有此部分,是新新增的 def weights_initializer_random_normal(shape, dtype=tf.float32, partition_info=None): return random_ops.random_normal(shape) #h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #第1層卷積———————————————————————— with tf.name_scope("conv1") as scope: #這裡用的是高階層,而不是標準層tf.nn.conv2d,二者的區別見書本第5.3.5節 conv2d_layer_one = tf.contrib.layers.convolution2d( image_holder, #產生濾波器的數量,書中程式碼有誤 num_outputs=32, #num_output_channels=32, #核尺寸 kernel_size=(5, 5), #啟用函式 activation_fn=tf.nn.relu, #權值初始化,書中程式碼有誤: #1、weight_init應該是weights_initializer; #2、寫成tf.random_normal會報錯:random_normal() got an unexpected keyword argument 'partition_info', weights_initializer=weights_initializer_random_normal, # weight_init=tf.random_normal, stride=(2, 2), trainable=True) #第1層池化———————————————————————————————— with tf.name_scope("pool1") as scope: pool_layer_one = tf.nn.max_pool( conv2d_layer_one, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #第2層卷積———————————————————————————————— with tf.name_scope("conv2") as scope: conv2d_layer_two = tf.contrib.layers.convolution2d( pool_layer_one, #修改,原因同第1層 num_outputs=64, #num_output_channels=64, kernel_size=(5, 5), activation_fn=tf.nn.relu, #修改,原因同第1層 weights_initializer=weights_initializer_random_normal, #weight_init=tf.random_normal, stride=(1, 1), trainable=True) #第2層池化———————————————————————————————— with tf.name_scope("pool2") as scope: pool_layer_two = tf.nn.max_pool( conv2d_layer_two, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #展開層,展開為秩1張量—————————————————————— with tf.name_scope("flat") as scope: flattened_layer_two = tf.reshape(pool_layer_two, [BATCH_SIZE, -1]) #全連線層1————————————————————————————————— with tf.name_scope("full_connect1") as scope: hidden_layer_three = tf.contrib.layers.fully_connected( flattened_layer_two, 1024, #修改,原因同第1層 weights_initializer=lambda i, dtype, partition_info=None: tf.truncated_normal([65536, 1024], stddev=0.1), #weight_init=lambda i, dtype: tf.truncated_normal([38912, 512], stddev=0.1), activation_fn=tf.nn.relu) #小trick:dropout hidden_layer_three = tf.nn.dropout(hidden_layer_three, keep_prob_holder) #全連線層2————————————————————————————————— with tf.name_scope("full_connect2") as scope: final_fully_connected = tf.contrib.layers.fully_connected( hidden_layer_three, 120, #修改,原因同第1層 weights_initializer=lambda i, dtype, partition_info=None: tf.truncated_normal([1024, 120], stddev=0.1) #weight_init=lambda i, dtype: tf.truncated_normal([512, 120], stddev=0.1) ) #輸出——————————————————————— with tf.name_scope("output") as scope: logits = final_fully_connected #查詢排名第1的分類結果是否是實際的種類 top_k_op = tf.nn.in_top_k(logits, label_holder, 1) #————————————————————————————————————————loss———————————————————————————————————————————————— #計算交叉熵 def loss(logits, labels): #按照tensorflow1.0以上版本修改 #logits是全連線層的輸出,不需softmax歸一化,因為sparse_softmax_cross_entropy_with_logits函式會先將logits進行softmax歸一化,然後與label表示的onehot向量比較,計算交叉熵。 return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=labels)) #————————————————————————————————————————training——————————————————————————————————————————————— #模型訓練 def training(loss_value, learning_rate, batch): return tf.train.AdamOptimizer(learning_rate, 0.9).minimize( loss_value, global_step=batch) #————————————————————————————————————————主函式—————————————————————————————————————————————————— if __name__ == '__main__': #下面的幾句是我新增的,因為我這裡讀到的路徑形式為:'./imagenet-dogs\\n02085620-Chihuahua\\',路徑分隔符中除第1個之外,都是2個反斜槓,與例程不一致。這裡將2個反斜槓替換為斜槓。 #glob.glob 用於獲取所有匹配的路徑 glob_path = glob.glob(r"G:\AI\Images\*") #讀取所有的label,形式為n02085620-Chihuahua.... labels_all = list(map(lambda c: c.split("\\")[-1], glob_path)) #將所有的檔名列表(由函式tf.train.match_filenames_once匹配產生) #生成一個佇列,供後面的檔案閱讀器reader讀取 #訓練資料佇列 filename_queue_train = tf.train.string_input_producer( tf.train.match_filenames_once("F:/TS/TS_p_c/output/training-images/*.tfrecords")) #測試資料佇列 filename_queue_test = tf.train.string_input_producer( tf.train.match_filenames_once("F:/TS/TS_p_c/output/testing-images/*.tfrecords")) #建立tfrecord閱讀器,並讀取資料。 #預設shuffle=True,將檔案打亂 reader = tf.TFRecordReader() _, serialized_train = reader.read(filename_queue_train) _, serialized_test = reader.read(filename_queue_test) #讀取訓練資料—————————————————————————————————— train_image_batch, train_label_batch = read_tfrecord( serialized_train, BATCH_SIZE) # Converting the images to a float of [0,1) to match the expected input to convolution2d train_images_op = convert_image(train_image_batch) # Match every label from label_batch and return the index where they exist in the list of classes train_labels_op = find_index_label(train_label_batch) #讀取測試資料—————————————————————————————————— test_image_batch, test_label_batch = read_tfrecord(serialized_test, BATCH_SIZE) # Converting the images to a float of [0,1) to match the expected input to convolution2d test_images_op = convert_image(test_image_batch) # Match every label from label_batch and return the index where they exist in the list of classes test_labels_op = find_index_label(test_label_batch) #———————————————————————————————————————————— batch = tf.Variable(0) learning_rate = tf.train.exponential_decay( 0.01, batch * 3, 120, 0.95, staircase=True) loss_op = loss(logits, train_labels_op) train_op = training(loss_op, learning_rate, batch) sess = tf.InteractiveSession() #必須同時有全域性變數和區域性變數的初始化,不然會報錯: #OutOfRangeError (see above for traceback): RandomShuffleQueue '_134_shuffle_batch_8/random_shuffle_queue' is closed and has insufficient elements (requested 3, current size 0) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) #宣告一個Coordinator類來協同多個執行緒 coord = tf.train.Coordinator() # 開始 Queue Runners (佇列執行器) threads = tf.train.start_queue_runners(sess=sess, coord=coord) #執行訓練———————————————————————————————————————————— for j in range(100): train_images = sess.run(train_images_op) train_labels = sess.run(train_labels_op) #print(sess.run(train_label_batch)) #print(train_labels) train_logits, train_result, _ = sess.run( [logits, top_k_op, train_op], feed_dict={ image_holder: train_images, label_holder: train_labels, keep_prob_holder: 0.5 }) #print(train_logits) #print(train_result) if j % 10 == 0: # print(train_labels) # print(train_result) print("loss = ", sess.run( loss_op, feed_dict={ image_holder: train_images, label_holder: train_labels, keep_prob_holder: 1 }), 't=', j) #測試———————————————————————————————————————————— #測試輪數 num_examples = 1000 num_iter = int(math.ceil(num_examples/BATCH_SIZE)) total_sample_count = num_iter*BATCH_SIZE true_count = 0 #測試總準確度 accuracy_total = 0 step = 0 while step < num_iter: test_images = sess.run(test_images_op) test_labels = sess.run(test_labels_op) prediction = sess.run( top_k_op, feed_dict={ image_holder: test_images, label_holder: test_labels, keep_prob_holder: 1.0 }) true_count += np.sum(prediction) step += 1 tem_prediction = true_count/(step*BATCH_SIZE) if step % 10 == 0: print("第", step, "輪測試,準確率為:%.3f, 其中top_1為: %d" % (tem_prediction, np.sum(prediction))) predictions = true_count/total_sample_count print("總準確率為:%.3f" % predictions) # if i%10 == 0: # print("次數:",i,"————————————————————————————————") # print(test_labels) # print(test_result) #結束———————————————————————————————————————————— #通知其他執行緒退出 coord.request_stop() #等待所有執行緒退出 coord.join(threads) sess.close()