將自己手動標註的資料集(PascalVOC格式)轉化為.TFRecord格式
阿新 • • 發佈:2018-12-29
“ 一個人如果不能學會遺忘,那將是很痛苦的事,別再自尋煩惱,快把痛苦的事給忘了吧!”
為了能夠使用Object Detection API~
需要將資料集格式轉化為.TFRecord再進行訓練~
至於,
如何使用Tensorflow官方的Object Detection API
包括下載、依賴(protobuf等)安裝、跑demo、訓練自己的資料過程~
推薦一篇博文: 1.https://blog.csdn.net/rookie_wei/article/details/81143814
2.
3.https://blog.csdn.net/rookie_wei/article/details/81275663
整個過程比較詳細,可以參考~
本篇主要介紹如何將已標註好的資料集轉化成Tensorflow通用的.TFRecord格式~
注意:本程式是我自己檢測的6類object,根據情況修改!
#-*- coding=utf-8 -*- # File Name: Create_TFRecord.py # Author: HZ # Created Time: 2018-06-06 import os import sys import random import numpy as np import tensorflow as tf import xml.etree.ElementTree as ET #操作xml檔案 #我的標籤定義有6類,根據自己的圖片而定 VOC_LABELS = { 'none': (0, 'Background'), 'person': (1, 'Person'), 'car': (2, 'Car'), 'bus': (3, 'Bus'), 'truck': (4, 'Truck'), 'cyclist': (5, 'cyclist') } # 圖片和標籤存放的資料夾. DIRECTORY_ANNOTATIONS = 'Annotations/' DIRECTORY_IMAGES = 'JPEGImages/' # 隨機種子. RANDOM_SEED = 4242 #生成整數型,浮點型和字串型的屬性 def int64_feature(value): if not isinstance(value, list): value = [value] return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def float_feature(value): if not isinstance(value, list): value = [value] return tf.train.Feature(float_list=tf.train.FloatList(value=value)) def bytes_feature(value): if not isinstance(value, list): value = [value] return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) #圖片處理 def _process_image(directory, name): # Read the image file. filename = directory + DIRECTORY_IMAGES + name + '.jpg' image_data = tf.gfile.FastGFile(filename, 'rb').read() # Read the XML annotation file. filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml') tree = ET.parse(filename) root = tree.getroot() # Image shape. size = root.find('size') shape = [int(size.find('height').text), int(size.find('width').text), int(size.find('depth').text)] # Find annotations. bboxes = [] labels = [] labels_text = [] difficult = [] truncated = [] for obj in root.findall('object'): label = obj.find('name').text labels.append(int(VOC_LABELS[label][0])) labels_text.append(label.encode('ascii')) #變為ascii格式 if obj.find('difficult'): difficult.append(int(obj.find('difficult').text)) else: difficult.append(0) if obj.find('truncated'): truncated.append(int(obj.find('truncated').text)) else: truncated.append(0) bbox = obj.find('bndbox') a=float(bbox.find('ymin').text) / shape[0] b=float(bbox.find('xmin').text) / shape[1] a1=float(bbox.find('ymax').text) / shape[0] b1=float(bbox.find('xmax').text) / shape[1] a_e=a1-a b_e=b1-b if abs(a_e)<1 and abs(b_e)<1: bboxes.append((a,b,a1,b1)) return image_data, shape, bboxes, labels, labels_text, difficult, truncated #轉化樣例 def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data)})) return example #增加到tfrecord def _add_to_tfrecord(dataset_dir, name, tfrecord_writer): image_data, shape, bboxes, labels, labels_text, difficult, truncated = \ _process_image(dataset_dir, name) example = _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated) tfrecord_writer.write(example.SerializeToString()) #name為轉化檔案的字首 def _get_output_filename(output_dir, name, idx): return '%s/%s_%03d.tfrecord' % (output_dir, name, idx) def run(dataset_dir, output_dir, name='voc_train', shuffling=False): if not tf.gfile.Exists(dataset_dir): tf.gfile.MakeDirs(dataset_dir) path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS) filenames = sorted(os.listdir(path)) #排序 # shuffling == Ture時,打亂順序 if shuffling: random.seed(RANDOM_SEED) random.shuffle(filenames) i = 0 fidx = 0 while i < len(filenames): # Open new TFRecord file. tf_filename = _get_output_filename(output_dir, name, fidx) with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: while i < len(filenames): sys.stdout.write(' Converting image %d/%d \n' % (i+1, len(filenames)))#終端列印,類似print sys.stdout.flush() #緩衝 filename = filenames[i] img_name = filename[:-4] _add_to_tfrecord(dataset_dir, img_name, tfrecord_writer) i += 1 fidx += 1 print('\nFinished converting the Pascal VOC dataset!') #原資料集路徑,輸出路徑以及輸出檔名 dataset_dir="./VOC2007/" output_dir="./TFRecords" name="voc_train" def main(_): run(dataset_dir, output_dir,name) if __name__ == '__main__': tf.app.run()
在獲得訓練好的模型,進行檢測時的demo.py如下: (較好)
#encoding:utf-8
import tensorflow as tf
import numpy as np
import os
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_utils
#下載下來的模型的目錄
MODEL_DIR = 'object_detection/ssd_mobilenet_v1_coco_2018_01_28'
#下載下來的模型的檔案
MODEL_CHECK_FILE = os.path.join(MODEL_DIR, 'frozen_inference_graph.pb')
#資料集對於的label
MODEL_LABEL_MAP = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt')
#資料集分類數量,可以開啟mscoco_label_map.pbtxt檔案看看
MODEL_NUM_CLASSES = 90
#這裡是獲取例項圖片檔名,將其放到陣列中
PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images'
TEST_IMAGES_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 6)]
#輸出影象大小,單位是in
IMAGE_SIZE = (12, 8)
tf.reset_default_graph()
#將模型讀取到預設的圖中
with tf.gfile.GFile(MODEL_CHECK_FILE, 'rb') as fd:
_graph = tf.GraphDef()
_graph.ParseFromString(fd.read())
tf.import_graph_def(_graph, name='')
#載入COCO資料標籤,將mscoco_label_map.pbtxt的內容轉換成
# {1: {'id': 1, 'name': u'person'}...90: {'id': 90, 'name': u'toothbrush'}}格式
label_map = label_map_util.load_labelmap(MODEL_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=MODEL_NUM_CLASSES)
category_index = label_map_util.create_category_index(categories)
#將圖片轉化成numpy陣列形式
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
#在圖中開始計算
detection_graph = tf.get_default_graph()
with tf.Session(graph=detection_graph) as sess:
for image_path in TEST_IMAGES_PATHS:
print(image_path)
#讀取圖片
image = Image.open(image_path)
#將圖片資料轉成陣列
image_np = load_image_into_numpy_array(image)
#增加一個維度
image_np_expanded = np.expand_dims(image_np, axis=0)
#下面都是獲取模型中的變數,直接使用就好了
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
#存放所有檢測框
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
#每個檢測結果的可信度
scores = detection_graph.get_tensor_by_name('detection_scores:0')
#每個框對應的類別
classes = detection_graph.get_tensor_by_name('detection_classes:0')
#檢測框的個數
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
#開始計算
(boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections],
feed_dict={image_tensor : image_np_expanded})
#列印識別結果
print(num_detections)
print(boxes)
print(classes)
print(scores)
#得到視覺化結果
vis_utils.visualize_boxes_and_labels_on_image_array(
image_np,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8
)
#顯示
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
plt.show()
恩,複習+鞏固!
sweet~