1. 程式人生 > 程式設計 >使用tensorflow框架在Colab上跑通貓狗識別程式碼

使用tensorflow框架在Colab上跑通貓狗識別程式碼

一、 前提:

有Google賬號(具體怎麼註冊賬號這裡不詳述,大家都懂的,自行百度)在你的Google郵箱中關聯好colab(怎樣在Google郵箱中使用colab在此不詳述,自行百度)

二、 現在開始:

因為我們使用的是colab,所以就不必為安裝版本對應的anaconda、python以及tensorflow爾苦惱了,經過以下配置就可以直接開始使用了。

使用tensorflow框架在Colab上跑通貓狗識別程式碼

使用tensorflow框架在Colab上跑通貓狗識別程式碼

使用tensorflow框架在Colab上跑通貓狗識別程式碼

在colab中新建程式碼塊,執行以下程式碼來下載需要的資料集

# In this exercise you will train a CNN on the FULL Cats-v-dogs dataset
# This will require you doing a lot of data preprocessing because
# the dataset isn't split into training and validation for you
# This code block has all the required inputs
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
# This code block downloads the full Cats-v-Dogs dataset and stores it as 
# cats-and-dogs.zip. It then unzips it to /tmp
# which will create a tmp/PetImages directory containing subdirectories
# called 'Cat' and 'Dog' (that's how the original researchers structured it)
# If the URL doesn't work,# .  visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765
# And right click on the 'Download Manually' link to get a new URL

!wget --no-check-certificate \
  "https://github.com/ADlead/Dogs-Cats/archive/master.zip" \
  -O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref = zipfile.ZipFile(local_zip,'r')
zip_ref.extractall('/tmp')
zip_ref.close()

執行結果:

在colab中預設安裝TensorFlow1.14,所以會提示讓升級tensorflow,可以不用理會,需要升級為2.0的也可以自行百度去升級。
接下來會提示我們需要的資料集以壓縮包的形式已經下載好了

使用tensorflow框架在Colab上跑通貓狗識別程式碼

使用tensorflow框架在Colab上跑通貓狗識別程式碼

執行以下程式碼來解壓下載好的資料集並把訓練影象集劃分成訓練影象集和測試影象集,分別用於訓練模型和測試模型。把25000張影象劃分成20000張訓練影象和5000張測試影象。深度學習的框架使用的是tensorflow,為了能讓tensorflow分批輸入資料進行訓練,把所有的影象畫素資訊儲存成batch檔案。訓練集100個batch檔案,每個檔案有200張影象。測試集1個batch檔案,共5000張影象。

import cv2 as cv
import os
import numpy as np

import random
import pickle

import time

start_time = time.time()

data_dir = '/tmp/Dogs-Cats-master/data'
batch_save_path = '/tmp/Dogs-Cats-master/batch_files'

# 建立batch檔案儲存的資料夾
os.makedirs(batch_save_path,exist_ok=True)

# 圖片統一大小:100 * 100
# 訓練集 20000:100個batch檔案,每個檔案200張圖片
# 驗證集 5000: 一個測試檔案,測試時 50張 x 100 批次

# 進入圖片資料的目錄,讀取圖片資訊
all_data_files = os.listdir(os.path.join(data_dir,'train/'))

# print(all_data_files)

# 打算資料的順序
random.shuffle(all_data_files)

all_train_files = all_data_files[:20000]
all_test_files = all_data_files[20000:]

train_data = []
train_label = []
train_filenames = []

test_data = []
test_label = []
test_filenames = []

# 訓練集
for each in all_train_files:
  img = cv.imread(os.path.join(data_dir,'train/',each),1)
  resized_img = cv.resize(img,(100,100))

  img_data = np.array(resized_img)
  train_data.append(img_data)
  if 'cat' in each:
    train_label.append(0)
  elif 'dog' in each:
    train_label.append(1)
  else:
    raise Exception('%s is wrong train file'%(each))
  train_filenames.append(each)

# 測試集
for each in all_test_files:
  img = cv.imread(os.path.join(data_dir,100))

  img_data = np.array(resized_img)
  test_data.append(img_data)
  if 'cat' in each:
    test_label.append(0)
  elif 'dog' in each:
    test_label.append(1)
  else:
    raise Exception('%s is wrong test file'%(each))
  test_filenames.append(each)

print(len(train_data),len(test_data))

# 製作100個batch檔案
start = 0
end = 200
for num in range(1,101):
  batch_data = train_data[start: end]
  batch_label = train_label[start: end]
  batch_filenames = train_filenames[start: end]
  batch_name = 'training batch {} of 15'.format(num)

  all_data = {
    'data':batch_data,'label':batch_label,'filenames':batch_filenames,'name':batch_name
  }

  with open(os.path.join(batch_save_path,'train_batch_{}'.format(num)),'wb') as f:
    pickle.dump(all_data,f)

  start += 200
  end += 200

# 製作測試檔案
all_test_data = {
  'data':test_data,'label':test_label,'filenames':test_filenames,'name':'test batch 1 of 1'
}

with open(os.path.join(batch_save_path,'test_batch'),'wb') as f:
  pickle.dump(all_test_data,f)


end_time = time.time()
print('製作結束,用時{}秒'.format(end_time - start_time))

執行結果:

使用tensorflow框架在Colab上跑通貓狗識別程式碼

使用tensorflow框架在Colab上跑通貓狗識別程式碼

執行以下編寫卷積層、池化層、全連線層、搭建tensorflow的計算圖、定義佔位符、計算損失函式、預測值、準確率以及訓練部分的程式碼

import tensorflow as tf
import numpy as np
import cv2 as cv
import os
import pickle


''' 全域性引數 '''
IMAGE_SIZE = 100
LEARNING_RATE = 1e-4
TRAIN_STEP = 10000
TRAIN_SIZE = 100
TEST_STEP = 100
TEST_SIZE = 50

IS_TRAIN = True

SAVE_PATH = '/tmp/Dogs-Cats-master/model/'

data_dir = '/tmp/Dogs-Cats-master/batch_files'
pic_path = '/tmp/Dogs-Cats-master/data/test1'

''''''


def load_data(filename):
  '''從batch檔案中讀取圖片資訊'''
  with open(filename,'rb') as f:
    data = pickle.load(f,encoding='iso-8859-1')
    return data['data'],data['label'],data['filenames']

# 讀取資料的類
class InputData:
  def __init__(self,filenames,need_shuffle):
    all_data = []
    all_labels = []
    all_names = []
    for file in filenames:
      data,labels,filename = load_data(file)

      all_data.append(data)
      all_labels.append(labels)
      all_names += filename

    self._data = np.vstack(all_data)
    self._labels = np.hstack(all_labels)
    print(self._data.shape)
    print(self._labels.shape)

    self._filenames = all_names

    self._num_examples = self._data.shape[0]
    self._need_shuffle = need_shuffle
    self._indicator = 0
    if self._indicator:
      self._shuffle_data()

  def _shuffle_data(self):
    # 把資料再混排
    p = np.random.permutation(self._num_examples)
    self._data = self._data[p]
    self._labels = self._labels[p]

  def next_batch(self,batch_size):
    '''返回每一批次的資料'''
    end_indicator = self._indicator + batch_size
    if end_indicator > self._num_examples:
      if self._need_shuffle:
        self._shuffle_data()
        self._indicator = 0
        end_indicator = batch_size
      else:
        raise Exception('have no more examples')
    if end_indicator > self._num_examples:
      raise Exception('batch size is larger than all examples')
    batch_data = self._data[self._indicator : end_indicator]
    batch_labels = self._labels[self._indicator : end_indicator]
    batch_filenames = self._filenames[self._indicator : end_indicator]
    self._indicator = end_indicator
    return batch_data,batch_labels,batch_filenames

# 定義一個類
class MyTensor:
  def __init__(self):


    # 載入訓練集和測試集
    train_filenames = [os.path.join(data_dir,'train_batch_%d'%i) for i in range(1,101)]
    test_filenames = [os.path.join(data_dir,'test_batch')]
    self.batch_train_data = InputData(train_filenames,True)
    self.batch_test_data = InputData(test_filenames,True)

    pass

  def flow(self):
    self.x = tf.placeholder(tf.float32,[None,IMAGE_SIZE,3],'input_data')
    self.y = tf.placeholder(tf.int64,[None],'output_data')
    self.keep_prob = tf.placeholder(tf.float32)

    # self.x = self.x / 255.0 需不需要這一步?

    # 圖片輸入網路中
    fc = self.conv_net(self.x,self.keep_prob)

    self.loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y,logits=fc)
    self.y_ = tf.nn.softmax(fc) # 計算每一類的概率
    self.predict = tf.argmax(fc,1)
    self.acc = tf.reduce_mean(tf.cast(tf.equal(self.predict,self.y),tf.float32))

    self.train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.loss)
    self.saver = tf.train.Saver(max_to_keep=1)

    print('計算流圖已經搭建.')

  # 訓練
  def myTrain(self):
    acc_list = []
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())

      for i in range(TRAIN_STEP):
        train_data,train_label,_ = self.batch_train_data.next_batch(TRAIN_SIZE)

        eval_ops = [self.loss,self.acc,self.train_op]
        eval_ops_results = sess.run(eval_ops,feed_dict={
          self.x:train_data,self.y:train_label,self.keep_prob:0.7
        })
        loss_val,train_acc = eval_ops_results[0:2]

        acc_list.append(train_acc)
        if (i+1) % 100 == 0:
          acc_mean = np.mean(acc_list)
          print('step:{0},loss:{1:.5},acc:{2:.5},acc_mean:{3:.5}'.format(
            i+1,loss_val,train_acc,acc_mean
          ))
        if (i+1) % 1000 == 0:
          test_acc_list = []
          for j in range(TEST_STEP):
            test_data,test_label,_ = self.batch_test_data.next_batch(TRAIN_SIZE)
            acc_val = sess.run([self.acc],feed_dict={
              self.x:test_data,self.y:test_label,self.keep_prob:1.0
            })
            test_acc_list.append(acc_val)
          print('[Test ] step:{0},mean_acc:{1:.5}'.format(
            i+1,np.mean(test_acc_list)
          ))
      # 儲存訓練後的模型
      os.makedirs(SAVE_PATH,exist_ok=True)
      self.saver.save(sess,SAVE_PATH + 'my_model.ckpt')

  def myTest(self):
    with tf.Session() as sess:
      model_file = tf.train.latest_checkpoint(SAVE_PATH)
      model = self.saver.restore(sess,save_path=model_file)
      test_acc_list = []
      predict_list = []
      for j in range(TEST_STEP):
        test_data,test_name = self.batch_test_data.next_batch(TEST_SIZE)
        for each_data,each_label,each_name in zip(test_data,test_name):
          acc_val,y__,pre,test_img_data = sess.run(
            [self.acc,self.y_,self.predict,self.x],feed_dict={
              self.x:each_data.reshape(1,3),self.y:each_label.reshape(1),self.keep_prob:1.0
            }
          )
          predict_list.append(pre[0])
          test_acc_list.append(acc_val)

          # 把測試結果顯示出來
          self.compare_test(test_img_data,pre[0],y__[0],each_name)
      print('[Test ] mean_acc:{0:.5}'.format(np.mean(test_acc_list)))

  def compare_test(self,input_image_arr,input_label,output,probability,img_name):
    classes = ['cat','dog']
    if input_label == output:
      result = '正確'
    else:
      result = '錯誤'
    print('測試【{0}】,輸入的label:{1},預測得是{2}:{3}的概率:{4:.5},輸入的圖片名稱:{5}'.format(
      result,classes[output],probability[output],img_name
    ))

  def conv_net(self,x,keep_prob):
    conv1_1 = tf.layers.conv2d(x,16,(3,padding='same',activation=tf.nn.relu,name='conv1_1')
    conv1_2 = tf.layers.conv2d(conv1_1,name='conv1_2')
    pool1 = tf.layers.max_pooling2d(conv1_2,(2,2),name='pool1')

    conv2_1 = tf.layers.conv2d(pool1,32,name='conv2_1')
    conv2_2 = tf.layers.conv2d(conv2_1,name='conv2_2')
    pool2 = tf.layers.max_pooling2d(conv2_2,name='pool2')

    conv3_1 = tf.layers.conv2d(pool2,64,name='conv3_1')
    conv3_2 = tf.layers.conv2d(conv3_1,name='conv3_2')
    pool3 = tf.layers.max_pooling2d(conv3_2,name='pool3')

    conv4_1 = tf.layers.conv2d(pool3,128,name='conv4_1')
    conv4_2 = tf.layers.conv2d(conv4_1,name='conv4_2')
    pool4 = tf.layers.max_pooling2d(conv4_2,name='pool4')

    flatten = tf.layers.flatten(pool4) # 把網路展平,以輸入到後面的全連線層

    fc1 = tf.layers.dense(flatten,512,tf.nn.relu)
    fc1_dropout = tf.nn.dropout(fc1,keep_prob=keep_prob)
    fc2 = tf.layers.dense(fc1,256,tf.nn.relu)
    fc2_dropout = tf.nn.dropout(fc2,keep_prob=keep_prob)
    fc3 = tf.layers.dense(fc2,2,None) # 得到輸出fc3

    return fc3

  def main(self):
    self.flow()
    if IS_TRAIN is True:
      self.myTrain()
    else:
      self.myTest()

  def final_classify(self):
    all_test_files_dir = './data/test1'
    all_test_filenames = os.listdir(all_test_files_dir)
    if IS_TRAIN is False:
      self.flow()
      # self.classify()
      with tf.Session() as sess:
        model_file = tf.train.latest_checkpoint(SAVE_PATH)
        mpdel = self.saver.restore(sess,save_path=model_file)

        predict_list = []
        for each_filename in all_test_filenames:
          each_data = self.get_img_data(os.path.join(all_test_files_dir,each_filename))
          y__,test_img_data = sess.run(
            [self.y_,self.keep_prob: 1.0
            }
          )
          predict_list.append(pre[0])
          self.classify(test_img_data,each_filename)

    else:
      print('now is training model...')

  def classify(self,'dog']
    single_image = input_image_arr[0] #* 255
    if output == 0:
      output_dir = 'cat/'
    else:
      output_dir = 'dog/'
    os.makedirs(os.path.join('./classiedResult',output_dir),exist_ok=True)
    cv.imwrite(os.path.join('./classiedResult',output_dir,img_name),single_image)
    print('輸入的圖片名稱:{0},預測得有{1:5}的概率是{2}:{3}'.format(
      img_name,classes[output]
    ))

  # 根據名稱獲取圖片畫素
  def get_img_data(self,img_name):
    img = cv.imread(img_name)
    resized_img = cv.resize(img,100))
    img_data = np.array(resized_img)

    return img_data




if __name__ == '__main__':

  mytensor = MyTensor()
  mytensor.main() # 用於訓練或測試

  # mytensor.final_classify() # 用於最後的分類

  print('hello world')

執行結果:

使用tensorflow框架在Colab上跑通貓狗識別程式碼

參考:https://www.jianshu.com/p/9ee2533c8adb

程式碼出處:https://github.com/ADlead/Dogs-Cats.git

到此這篇關於使用tensorflow框架在Colab上跑通貓狗識別程式碼的文章就介紹到這了,更多相關tensorflow框架在Colab上跑通貓狗識別內容請搜尋我們以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援我們!