VOC資料集與COCO資料集
說明:以下程式碼全部為完整的,但是其中路徑不是一個專案,可根據自己情況修改,僅供參考!個人筆記,一起學習!!
VOC2007:中包含9963張標註過的圖片, 由train/val/test三部分組成, 共標註出24,640個物體。 VOC2007的test資料label已經公佈, 之後的沒有公佈(只有圖片,沒有label)。
VOC2012:對於檢測任務,VOC2012的trainval/test包含08-11年的所有對應圖片。 trainval有11540張圖片共27450個物體。 對於分割任務, VOC2012的trainval包含07-11年的所有對應圖片, test只包含08-11。trainval有 2913張圖片共6929個物體。
Person: person
Animal: bird, cat, cow, dog, horse, sheep
Vehicle: aeroplane, bicycle, boat, bus, car, motorbike, train
Indoor: bottle, chair, dining table, potted plant, sofa, tv/monitor
voc資料集格式:
data
|-- Annotations
|-- all xml files #存放xml檔案,與JPEGImages圖片一一對應
|-- JPEGImages
| -- all your samples #所有你的資料集圖片
|-- ImageSets
|-- 。。。。#存放資料集分成的txt檔案,每一行包含圖片的名稱。
ImageSets資料夾下txt檔案的生成:voc_annotation.py
import xml.etree.ElementTree as ET
from os import getcwd
sets = [('data', 'train'), ('data', 'val'), ('data', 'test')]
classes = ["jyz", "xcxj", "fzc" , "nc", "jyz_gz", "fzc_gz"] #根據自己情況修改
def convert_annotation(year, image_id, list_file):
in_file = open('./Annotations/%s.xml' % (image_id),encoding='utf-8')#注意程式碼與xml檔案路徑
tree = ET.parse(in_file)
root = tree.getroot()
for obj in root.iter('object'):
difficult = 0
if obj.find('difficult') != None:
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text),
int(xmlbox.find('ymax').text))
list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
wd = getcwd()
for year, image_set in sets:
image_ids = open('./ImageSets\%s.txt' % (image_set)).read().strip().split()#注意路徑
list_file = open('%s_%s.txt' % (year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s\JPEGImages\%s.jpg' % (wd, image_id))
convert_annotation(year, image_id, list_file)
list_file.write('\n')
list_file.close()
voc_label.py,執行後在data/label資料夾下面生成了對應的txt標籤,並在data下生成三個txt檔案。以下對應yolov4所需的txt檔案格式:(Image_path xmin0,ymin0,xmax0,ymax0,class0 xmin1,ymin1,xmax1,ymax1,class1 …)。
# xml解析包
import xml.etree.ElementTree as ET
import pickle
import os
# os.listdir() 方法用於返回指定的資料夾包含的檔案或資料夾的名字的列表
from os import listdir, getcwd
from os.path import join
sets = ['train', 'test', 'val']
classes = ['jyz', 'xcxj', 'fzc', 'nc','jyz_gz','fzc_gz']
# 進行歸一化操作
def convert(size, box): # size:(原圖w,原圖h) , box:(xmin,xmax,ymin,ymax)
dw = 1./size[0] # 1/w
dh = 1./size[1] # 1/h
x = (box[0] + box[1])/2.0 # 物體在圖中的中心點x座標
y = (box[2] + box[3])/2.0 # 物體在圖中的中心點y座標
w = box[1] - box[0] # 物體實際畫素寬度
h = box[3] - box[2] # 物體實際畫素高度
x = x*dw # 物體中心點x的座標比(相當於 x/原圖w)
w = w*dw # 物體寬度的寬度比(相當於 w/原圖w)
y = y*dh # 物體中心點y的座標比(相當於 y/原圖h)
h = h*dh # 物體寬度的寬度比(相當於 h/原圖h)
return (x, y, w, h) # 返回 相對於原圖的物體中心點的x座標比,y座標比,寬度比,高度比,取值範圍[0-1]
# year ='2012', 對應圖片的id(檔名)
def convert_annotation(image_id):
'''
將對應檔名的xml檔案轉化為label檔案,xml檔案包含了對應的bunding框以及圖片長款大小等資訊,
通過對其解析,然後進行歸一化最終讀到label檔案中去,也就是說
一張圖片檔案對應一個xml檔案,然後通過解析和歸一化,能夠將對應的資訊儲存到唯一一個label檔案中去
labal檔案中的格式:calss x y w h 同時,一張圖片對應的類別有多個,所以對應的bunding的資訊也有多個
'''
# 對應的通過year 找到相應的資料夾,並且開啟相應image_id的xml檔案,其對應bund檔案
in_file = open('./Annotations/%s.xml' % (image_id), encoding='utf-8')
# 準備在對應的image_id 中寫入對應的label,分別為
# <object-class> <x> <y> <width> <height>
out_file = open('./labels/%s.txt' % (image_id), 'w', encoding='utf-8')
# 解析xml檔案
tree = ET.parse(in_file)
# 獲得對應的鍵值對
root = tree.getroot()
# 獲得圖片的尺寸大小
size = root.find('size')
# 獲得寬
w = int(size.find('width').text)
# 獲得高
h = int(size.find('height').text)
# 遍歷目標obj
for obj in root.iter('object'):
# 獲得difficult ??
difficult = obj.find('difficult').text
# 獲得類別 =string 型別
cls = obj.find('name').text
# 如果類別不是對應在我們預定好的class檔案中,或difficult==1則跳過
if cls not in classes or int(difficult) == 1:
continue
# 通過類別名稱找到id
cls_id = classes.index(cls)
# 找到bndbox 物件
xmlbox = obj.find('bndbox')
# 獲取對應的bndbox的陣列 = ['xmin','xmax','ymin','ymax']
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
print(image_id, cls, b)
# 帶入進行歸一化操作
# w = 寬, h = 高, b= bndbox的陣列 = ['xmin','xmax','ymin','ymax']
bb = convert((w, h), b)
# bb 對應的是歸一化後的(x,y,w,h)
# 生成 calss x y w h 在label檔案中
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# 返回當前工作目錄
wd = getcwd()
print(wd)
for image_set in sets:
'''
對所有的檔案資料集進行遍歷
做了兩個工作:
1.講所有圖片檔案都遍歷一遍,並且將其所有的全路徑都寫在對應的txt檔案中去,方便定位
2.同時對所有的圖片檔案進行解析和轉化,將其對應的bundingbox 以及類別的資訊全部解析寫到label 檔案中去
最後再通過直接讀取檔案,就能找到對應的label 資訊
'''
# 先找labels資料夾如果不存在則建立
if not os.path.exists('./labels/'):
os.makedirs('./labels/')
# 讀取在ImageSets/Main 中的train、test..等檔案的內容
# 包含對應的檔名稱
image_ids = open('./ImageSets/%s.txt' % (image_set)).read().strip().split()
# 開啟對應的2012_train.txt 檔案對其進行寫入準備
list_file = open('./%s.txt' % (image_set), 'w')
# 將對應的檔案_id以及全路徑寫進去並換行
for image_id in image_ids:
list_file.write('./images/%s.jpg\n' % (image_id))
# 呼叫 year = 年份 image_id = 對應的檔名_id
convert_annotation(image_id)
# 關閉檔案
list_file.close()
COCO資料集是一個大型的、豐富的物體檢測,分割和字幕資料集。這個資料集以scene understanding為目標,主要從複雜的日常場景中擷取,影象中的目標通過精確的segmentation進行位置的標定。影象包括91類目標,328,000影像和2,500,000個label。目前為止有語義分割的最大資料集,提供的類別有80 類,有超過33 萬張圖片,其中20 萬張有標註,整個資料集中個體的數目超過150 萬個。
coco資料集格式:json檔案
coco資料集格式轉txt檔案:coco.py
import sys
sys.path.append("..")
import xml.etree.ElementTree as ET
import config.yolov4_config as cfg
import os
from tqdm import tqdm
def parse_voc_annotation(
data_path, file_type, anno_path, use_difficult_bbox=False
):
classes = cfg.COCO_DATA["CLASSES"]
img_inds_file = os.path.join(
data_path, "ImageSets", file_type + ".txt"
)
with open(img_inds_file, "r") as f:
lines = f.readlines()
image_ids = [line.strip() for line in lines]
with open(anno_path, "a") as f:
for image_id in tqdm(image_ids):
image_path = os.path.join(
data_path, "JPEGImages", image_id + ".jpg"
)
annotation = image_path
label_path = os.path.join(
data_path, "Annotations", image_id + ".xml"
)
root = ET.parse(label_path).getroot()
objects = root.findall("object")
for obj in objects:
difficult = obj.find("difficult").text.strip()
if (not use_difficult_bbox) and (
int(difficult) == 1
): # difficult 表示是否容易識別,0表示容易,1表示困難
continue
bbox = obj.find("bndbox")
class_id = classes.index(obj.find("name").text.lower().strip())
xmin = bbox.find("xmin").text.strip()
ymin = bbox.find("ymin").text.strip()
xmax = bbox.find("xmax").text.strip()
ymax = bbox.find("ymax").text.strip()
annotation += " " + ",".join(
[xmin, ymin, xmax, ymax, str(class_id)]
)
annotation += "\n"
if objects:
f.write(annotation)
# print(annotation)
return len(image_ids)
if __name__ == "__main__":
# train_set : VOC2007_trainval 和 VOC2012_trainval
train_data_path_2007 = os.path.join(
cfg.DATA_PATH, "COCO2017_train", "VOCdevkit", "VOC2007"
)
train_data_path_2012 = os.path.join(
cfg.DATA_PATH, "COCO2017_val", "VOCdevkit", "VOC2007" #修改路徑
)
train_annotation_path = os.path.join("../data", "train_annotation.txt") #修改檔名
if os.path.exists(train_annotation_path):
os.remove(train_annotation_path)
len_train = parse_voc_annotation(
train_data_path_2007,
"trainval",
train_annotation_path,
use_difficult_bbox=False,
) + parse_voc_annotation(
train_data_path_2012,
"trainval",
train_annotation_path,
use_difficult_bbox=False,
)
print(
"The number of images for train and test are :train : {0}".format(
len_train
)
)
COCO資料集轉VOC資料集:
from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
# the path you want to save your results for coco to voc
savepath = "cocodata/xml/"
img_dir = savepath + "images/"
anno_dir = savepath + "Annotations/"
datasets_list = ["train2017", "val2017", "test2017"]
classes_names = [
#自己新增類名
]
# Store annotations and train2014/val2014/... in this folder
dataDir = "E:\code\object_detection\yolov3-other-master\YOLOV3-master\data/" #修改自己資料集路徑
headstr = """\
<annotation>
<folder>VOC</folder>
<filename>%s</filename>
<source>
<database>My Database</database>
<annotation>COCO</annotation>
<image>flickr</image>
<flickrid>NULL</flickrid>
</source>
<owner>
<flickrid>NULL</flickrid>
<name>company</name>
</owner>
<size>
<width>%d</width>
<height>%d</height>
<depth>%d</depth>
</size>
<segmented>0</segmented>
"""
objstr = """\
<object>
<name>%s</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>%d</xmin>
<ymin>%d</ymin>
<xmax>%d</xmax>
<ymax>%d</ymax>
</bndbox>
</object>
"""
tailstr = """\
</annotation>
"""
# if the dir is not exists,make it,else delete it
def mkr(path):
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
else:
os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
classes = dict()
for cls in coco.dataset["categories"]:
classes[cls["id"]] = cls["name"]
return classes
def write_xml(anno_path, head, objs, tail):
f = open(anno_path, "w")
f.write(head)
for obj in objs:
f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
f.write(tail)
def save_annotations_and_imgs(coco, dataset, filename, objs):
# eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
anno_path = anno_dir + filename[:-3] + "xml"
img_path = dataDir + dataset + "/" + filename
# print(img_path)
dst_imgpath = img_dir + filename
img = cv2.imread(img_path)
if img.shape[2] == 1:
# print(filename + " not a RGB image")
return
# shutil.copy(img_path, dst_imgpath)
head = headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
tail = tailstr
write_xml(anno_path, head, objs, tail)
def showimg(coco, dataset, img, classes, cls_id, show=True):
global dataDir
I = Image.open("%s/%s/%s" % (dataDir, dataset, img["file_name"]))
# 通過id,得到註釋的資訊
annIds = coco.getAnnIds(imgIds=img["id"], catIds=cls_id, iscrowd=None)
# print(annIds)
anns = coco.loadAnns(annIds)
# print(anns)
# coco.showAnns(anns)
objs = []
for ann in anns:
class_name = classes[ann["category_id"]]
if class_name in classes_names:
# print(class_name)
if "bbox" in ann:
bbox = ann["bbox"]
xmin = int(bbox[0])
ymin = int(bbox[1])
if xmin <= 0:
xmin += 1
if ymin <= 0:
ymin += 1
xmax = int(bbox[2] + bbox[0])
ymax = int(bbox[3] + bbox[1])
obj = [class_name, xmin, ymin, xmax, ymax]
objs.append(obj)
draw = ImageDraw.Draw(I)
draw.rectangle([xmin, ymin, xmax, ymax])
if show:
plt.figure()
plt.axis("off")
plt.imshow(I)
plt.show()
return objs
if __name__ == "__main__":
i = 0
for dataset in datasets_list:
a = datasets_list
# ./COCO/annotations/instances_train2014.json 修改自己路徑
annFile = "{}/annotations/instances_{}.json".format(dataDir, dataset)
# COCO API for initializing annotated data
coco = COCO(annFile)
"""
COCO finished:
loading annotations into memory...
Done (t=0.81s)
creating index...
index created!
end
"""
# show all classes in coco
classes = id2name(coco)
# print(classes)
# [1, 2, 3, 4, 6, 8]
classes_ids = coco.getCatIds(catNms=classes_names)
# print(classes_ids)
for cls in classes_names:
i += 1
print(i)
b = classes_names
# Get ID number of this class
cls_id = coco.getCatIds(catNms=[cls])
img_ids = coco.getImgIds(catIds=cls_id)
# print(cls, len(img_ids))
# imgIds=img_ids[0:10]
for imgId in tqdm(img_ids):
c = img_ids
img = coco.loadImgs(imgId)[0]
filename = img["file_name"]
# print(filename)
objs = showimg(
coco, dataset, img, classes, classes_ids, show=False
)
# print(objs)
save_annotations_and_imgs(coco, dataset, filename, objs)
VOC轉COCO資料集:xml_to_json.py
# coding:utf-8
# 執行前請先做以下工作:
# pip install lxml
# 將所有的圖片及xml檔案存放到xml_dir指定的資料夾下,並將此資料夾放置到當前目錄下
import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET
START_BOUNDING_BOX_ID = 1
save_path = "."
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = get(root, name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
if length and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, json_file):
json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, line in enumerate(xml_list):
xml_f = line
tree = ET.parse(xml_f)
root = tree.getroot()
filename = os.path.basename(xml_f)[:-4] + ".jpg"
image_id = 20190000001 + index
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
json_dict['images'].append(image)
# Currently we do not support segmentation
segmented = get_and_check(root, 'segmented', 1).text
assert segmented == '0'
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
if category in all_categories:
all_categories[category] += 1
else:
all_categories[category] = 1
if category not in categories:
if only_care_pre_define_categories:
continue
new_id = len(categories) + 1
print(
"[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
category, pre_define_categories, new_id))
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
assert (xmax > xmin), "xmax <= xmin, {}".format(line)
assert (ymax > ymin), "ymax <= ymin, {}".format(line)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
image_id, 'bbox': [xmin, ymin, o_width, o_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'food', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
json_fp = open(json_file, 'w')
json_str = json.dumps(json_dict)
json_fp.write(json_str)
json_fp.close()
print("------------create {} done--------------".format(json_file))
print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
all_categories.keys(),
len(pre_define_categories),
pre_define_categories.keys()))
print("category: id --> {}".format(categories))
print(categories.keys())
print(categories.values())
if __name__ == '__main__':
# 定義你自己的類別
classes = ['RBC']
pre_define_categories = {}
for i, cls in enumerate(classes):
pre_define_categories[cls] = i + 1
# 這裡也可以自定義類別id,把上面的註釋掉換成下面這行即可
# pre_define_categories = {'a1': 1, 'a3': 2, 'a6': 3, 'a9': 4, "a10": 5}
only_care_pre_define_categories = True # or False
# 儲存的json檔案
save_json_train = 'train.json'
save_json_val = 'val.json'
save_json_test = 'test.json'
# 初始檔案所在的路徑
xml_dir = "./Annotations1"
xml_list = glob.glob(xml_dir + "/*.xml")
xml_list = np.sort(xml_list)
# 打亂資料集
np.random.seed(100)
np.random.shuffle(xml_list)
# 按比例劃分打亂後的資料集
train_ratio = 0.8
val_ratio = 0.1
train_num = int(len(xml_list) * train_ratio)
val_num = int(len(xml_list) * val_ratio)
xml_list_train = xml_list[:train_num]
xml_list_val = xml_list[train_num: train_num+val_num]
xml_list_test = xml_list[train_num+val_num:]
# 將xml檔案轉為coco檔案,在指定目錄下生成三個json檔案(train/test/food)
convert(xml_list_train, save_json_train)
convert(xml_list_val, save_json_val)
convert(xml_list_test, save_json_test)