1. 程式人生 > 其它 >VOC資料集與COCO資料集

VOC資料集與COCO資料集

技術標籤:深度學習深度學習

說明:以下程式碼全部為完整的,但是其中路徑不是一個專案,可根據自己情況修改,僅供參考!個人筆記,一起學習!!
VOC2007:中包含9963張標註過的圖片, 由train/val/test三部分組成, 共標註出24,640個物體。 VOC2007的test資料label已經公佈, 之後的沒有公佈(只有圖片,沒有label)。

VOC2012:對於檢測任務,VOC2012的trainval/test包含08-11年的所有對應圖片。 trainval有11540張圖片共27450個物體。 對於分割任務, VOC2012的trainval包含07-11年的所有對應圖片, test只包含08-11。trainval有 2913張圖片共6929個物體。

這些物體一共分為20類:

Person: person
Animal: bird, cat, cow, dog, horse, sheep
Vehicle: aeroplane, bicycle, boat, bus, car, motorbike, train
Indoor: bottle, chair, dining table, potted plant, sofa, tv/monitor

voc資料集格式:

  data
    |-- Annotations
    		|-- all xml files #存放xml檔案,與JPEGImages圖片一一對應
    |-- JPEGImages
    		|
-- all your samples #所有你的資料集圖片 |-- ImageSets |-- 。。。。#存放資料集分成的txt檔案,每一行包含圖片的名稱。

ImageSets資料夾下txt檔案的生成:voc_annotation.py

import xml.etree.ElementTree as ET
from os import getcwd

sets = [('data', 'train'), ('data', 'val'), ('data', 'test')]
classes = ["jyz", "xcxj", "fzc"
, "nc", "jyz_gz", "fzc_gz"] #根據自己情況修改 def convert_annotation(year, image_id, list_file): in_file = open('./Annotations/%s.xml' % (image_id),encoding='utf-8')#注意程式碼與xml檔案路徑 tree = ET.parse(in_file) root = tree.getroot() for obj in root.iter('object'): difficult = 0 if obj.find('difficult') != None: difficult = obj.find('difficult').text cls = obj.find('name').text if cls not in classes or int(difficult) == 1: continue cls_id = classes.index(cls) xmlbox = obj.find('bndbox') b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) wd = getcwd() for year, image_set in sets: image_ids = open('./ImageSets\%s.txt' % (image_set)).read().strip().split()#注意路徑 list_file = open('%s_%s.txt' % (year, image_set), 'w') for image_id in image_ids: list_file.write('%s\JPEGImages\%s.jpg' % (wd, image_id)) convert_annotation(year, image_id, list_file) list_file.write('\n') list_file.close()

voc_label.py,執行後在data/label資料夾下面生成了對應的txt標籤,並在data下生成三個txt檔案。以下對應yolov4所需的txt檔案格式:(Image_path xmin0,ymin0,xmax0,ymax0,class0 xmin1,ymin1,xmax1,ymax1,class1 …)。

# xml解析包
import xml.etree.ElementTree as ET
import pickle
import os
# os.listdir() 方法用於返回指定的資料夾包含的檔案或資料夾的名字的列表
from os import listdir, getcwd
from os.path import join
sets = ['train', 'test', 'val']
classes = ['jyz', 'xcxj', 'fzc', 'nc','jyz_gz','fzc_gz']
# 進行歸一化操作
def convert(size, box): # size:(原圖w,原圖h) , box:(xmin,xmax,ymin,ymax)
    dw = 1./size[0]     # 1/w
    dh = 1./size[1]     # 1/h
    x = (box[0] + box[1])/2.0   # 物體在圖中的中心點x座標
    y = (box[2] + box[3])/2.0   # 物體在圖中的中心點y座標
    w = box[1] - box[0]         # 物體實際畫素寬度
    h = box[3] - box[2]         # 物體實際畫素高度
    x = x*dw    # 物體中心點x的座標比(相當於 x/原圖w)
    w = w*dw    # 物體寬度的寬度比(相當於 w/原圖w)
    y = y*dh    # 物體中心點y的座標比(相當於 y/原圖h)
    h = h*dh    # 物體寬度的寬度比(相當於 h/原圖h)
    return (x, y, w, h)    # 返回 相對於原圖的物體中心點的x座標比,y座標比,寬度比,高度比,取值範圍[0-1]
# year ='2012', 對應圖片的id(檔名)
def convert_annotation(image_id):
    '''
    將對應檔名的xml檔案轉化為label檔案,xml檔案包含了對應的bunding框以及圖片長款大小等資訊,
    通過對其解析,然後進行歸一化最終讀到label檔案中去,也就是說
    一張圖片檔案對應一個xml檔案,然後通過解析和歸一化,能夠將對應的資訊儲存到唯一一個label檔案中去
    labal檔案中的格式:calss x y w h  同時,一張圖片對應的類別有多個,所以對應的bunding的資訊也有多個
    '''
    # 對應的通過year 找到相應的資料夾,並且開啟相應image_id的xml檔案,其對應bund檔案
    in_file = open('./Annotations/%s.xml' % (image_id), encoding='utf-8')
    # 準備在對應的image_id 中寫入對應的label,分別為
    # <object-class> <x> <y> <width> <height>
    out_file = open('./labels/%s.txt' % (image_id), 'w', encoding='utf-8')
    # 解析xml檔案
    tree = ET.parse(in_file)
    # 獲得對應的鍵值對
    root = tree.getroot()
    # 獲得圖片的尺寸大小
    size = root.find('size')
    # 獲得寬
    w = int(size.find('width').text)
    # 獲得高
    h = int(size.find('height').text)
    # 遍歷目標obj
    for obj in root.iter('object'):
        # 獲得difficult ??
        difficult = obj.find('difficult').text
        # 獲得類別 =string 型別
        cls = obj.find('name').text
        # 如果類別不是對應在我們預定好的class檔案中,或difficult==1則跳過
        if cls not in classes or int(difficult) == 1:
            continue
        # 通過類別名稱找到id
        cls_id = classes.index(cls)
        # 找到bndbox 物件
        xmlbox = obj.find('bndbox')
        # 獲取對應的bndbox的陣列 = ['xmin','xmax','ymin','ymax']
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        print(image_id, cls, b)
        # 帶入進行歸一化操作
        # w = 寬, h = 高, b= bndbox的陣列 = ['xmin','xmax','ymin','ymax']
        bb = convert((w, h), b)
        # bb 對應的是歸一化後的(x,y,w,h)
        # 生成 calss x y w h 在label檔案中
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# 返回當前工作目錄
wd = getcwd()
print(wd)
for image_set in sets:
    '''
    對所有的檔案資料集進行遍歷
    做了兩個工作:
    1.講所有圖片檔案都遍歷一遍,並且將其所有的全路徑都寫在對應的txt檔案中去,方便定位
    2.同時對所有的圖片檔案進行解析和轉化,將其對應的bundingbox 以及類別的資訊全部解析寫到label 檔案中去
         最後再通過直接讀取檔案,就能找到對應的label 資訊
    '''
    # 先找labels資料夾如果不存在則建立
    if not os.path.exists('./labels/'):
        os.makedirs('./labels/')
    # 讀取在ImageSets/Main 中的train、test..等檔案的內容
    # 包含對應的檔名稱
    image_ids = open('./ImageSets/%s.txt' % (image_set)).read().strip().split()
    # 開啟對應的2012_train.txt 檔案對其進行寫入準備
    list_file = open('./%s.txt' % (image_set), 'w')
    # 將對應的檔案_id以及全路徑寫進去並換行
    for image_id in image_ids:
        list_file.write('./images/%s.jpg\n' % (image_id))
        # 呼叫  year = 年份  image_id = 對應的檔名_id
        convert_annotation(image_id)
    # 關閉檔案
    list_file.close()

COCO資料集是一個大型的、豐富的物體檢測,分割和字幕資料集。這個資料集以scene understanding為目標,主要從複雜的日常場景中擷取,影象中的目標通過精確的segmentation進行位置的標定。影象包括91類目標,328,000影像和2,500,000個label。目前為止有語義分割的最大資料集,提供的類別有80 類,有超過33 萬張圖片,其中20 萬張有標註,整個資料集中個體的數目超過150 萬個。
coco資料集格式:json檔案

coco資料集格式轉txt檔案:coco.py

import sys

sys.path.append("..")
import xml.etree.ElementTree as ET
import config.yolov4_config as cfg
import os
from tqdm import tqdm


def parse_voc_annotation(
    data_path, file_type, anno_path, use_difficult_bbox=False
):

    classes = cfg.COCO_DATA["CLASSES"]
    img_inds_file = os.path.join(
        data_path, "ImageSets", file_type + ".txt"
    )
    with open(img_inds_file, "r") as f:
        lines = f.readlines()
        image_ids = [line.strip() for line in lines]

    with open(anno_path, "a") as f:
        for image_id in tqdm(image_ids):
            image_path = os.path.join(
                data_path, "JPEGImages", image_id + ".jpg"
            )
            annotation = image_path
            label_path = os.path.join(
                data_path, "Annotations", image_id + ".xml"
            )
            root = ET.parse(label_path).getroot()
            objects = root.findall("object")
            for obj in objects:
                difficult = obj.find("difficult").text.strip()
                if (not use_difficult_bbox) and (
                    int(difficult) == 1
                ):  # difficult 表示是否容易識別,0表示容易,1表示困難
                    continue
                bbox = obj.find("bndbox")
                class_id = classes.index(obj.find("name").text.lower().strip())
                xmin = bbox.find("xmin").text.strip()
                ymin = bbox.find("ymin").text.strip()
                xmax = bbox.find("xmax").text.strip()
                ymax = bbox.find("ymax").text.strip()
                annotation += " " + ",".join(
                    [xmin, ymin, xmax, ymax, str(class_id)]
                )
            annotation += "\n"
            if objects:
                f.write(annotation)

            # print(annotation)

    return len(image_ids)


if __name__ == "__main__":
    # train_set :  VOC2007_trainval 和 VOC2012_trainval
    train_data_path_2007 = os.path.join(
        cfg.DATA_PATH, "COCO2017_train", "VOCdevkit", "VOC2007"
    )
    train_data_path_2012 = os.path.join(
        cfg.DATA_PATH, "COCO2017_val", "VOCdevkit", "VOC2007"    #修改路徑
    )
    train_annotation_path = os.path.join("../data", "train_annotation.txt") #修改檔名
    if os.path.exists(train_annotation_path):
        os.remove(train_annotation_path)

    len_train = parse_voc_annotation(
        train_data_path_2007,
        "trainval",
        train_annotation_path,
        use_difficult_bbox=False,
    ) + parse_voc_annotation(
        train_data_path_2012,
        "trainval",
        train_annotation_path,
        use_difficult_bbox=False,
    )

    print(
        "The number of images for train and test are :train : {0}".format(
            len_train
        )
    )

COCO資料集轉VOC資料集:

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw

# the path you want to save your results for coco to voc
savepath = "cocodata/xml/"
img_dir = savepath + "images/"
anno_dir = savepath + "Annotations/"
datasets_list = ["train2017", "val2017", "test2017"]

classes_names = [
   #自己新增類名
]
# Store annotations and train2014/val2014/... in this folder
dataDir = "E:\code\object_detection\yolov3-other-master\YOLOV3-master\data/" #修改自己資料集路徑

headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>My Database</database>
        <annotation>COCO</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <owner>
        <flickrid>NULL</flickrid>
        <name>company</name>
    </owner>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""

tailstr = """\
</annotation>
"""


# if the dir is not exists,make it,else delete it
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)

mkr(img_dir)
mkr(anno_dir)

def id2name(coco):
    classes = dict()
    for cls in coco.dataset["categories"]:
        classes[cls["id"]] = cls["name"]
    return classes

def write_xml(anno_path, head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr % (obj[0], obj[1], obj[2], obj[3], obj[4]))
    f.write(tail)


def save_annotations_and_imgs(coco, dataset, filename, objs):
    # eg:COCO_train2014_000000196610.jpg-->COCO_train2014_000000196610.xml
    anno_path = anno_dir + filename[:-3] + "xml"
    img_path = dataDir + dataset + "/" + filename
    # print(img_path)
    dst_imgpath = img_dir + filename

    img = cv2.imread(img_path)
    if img.shape[2] == 1:
        # print(filename + " not a RGB image")
        return
    # shutil.copy(img_path, dst_imgpath)

    head = headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path, head, objs, tail)


def showimg(coco, dataset, img, classes, cls_id, show=True):
    global dataDir
    I = Image.open("%s/%s/%s" % (dataDir, dataset, img["file_name"]))
    # 通過id,得到註釋的資訊
    annIds = coco.getAnnIds(imgIds=img["id"], catIds=cls_id, iscrowd=None)
    # print(annIds)
    anns = coco.loadAnns(annIds)
    # print(anns)
    # coco.showAnns(anns)
    objs = []
    for ann in anns:
        class_name = classes[ann["category_id"]]
        if class_name in classes_names:
            # print(class_name)
            if "bbox" in ann:
                bbox = ann["bbox"]
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                if xmin <= 0:
                    xmin += 1
                if ymin <= 0:
                    ymin += 1
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                draw = ImageDraw.Draw(I)
                draw.rectangle([xmin, ymin, xmax, ymax])
    if show:
        plt.figure()
        plt.axis("off")
        plt.imshow(I)
        plt.show()

    return objs


if __name__ == "__main__":
    i = 0
    for dataset in datasets_list:
        a = datasets_list

        # ./COCO/annotations/instances_train2014.json 修改自己路徑
        annFile = "{}/annotations/instances_{}.json".format(dataDir, dataset)

        # COCO API for initializing annotated data
        coco = COCO(annFile)
        """
        COCO finished:
        loading annotations into memory...
        Done (t=0.81s)
        creating index...
        index created!
        end
        """
        # show all classes in coco
        classes = id2name(coco)
        # print(classes)
        # [1, 2, 3, 4, 6, 8]
        classes_ids = coco.getCatIds(catNms=classes_names)
        # print(classes_ids)
        for cls in classes_names:
            i += 1
            print(i)
            b = classes_names
            # Get ID number of this class
            cls_id = coco.getCatIds(catNms=[cls])
            img_ids = coco.getImgIds(catIds=cls_id)
            # print(cls, len(img_ids))
            # imgIds=img_ids[0:10]
            for imgId in tqdm(img_ids):
                c = img_ids
                img = coco.loadImgs(imgId)[0]
                filename = img["file_name"]
                # print(filename)
                objs = showimg(
                    coco, dataset, img, classes, classes_ids, show=False
                )
                # print(objs)
                save_annotations_and_imgs(coco, dataset, filename, objs)

VOC轉COCO資料集:xml_to_json.py

# coding:utf-8
# 執行前請先做以下工作:
# pip install lxml
# 將所有的圖片及xml檔案存放到xml_dir指定的資料夾下,並將此資料夾放置到當前目錄下

import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET

START_BOUNDING_BOX_ID = 1
save_path = "."

def get(root, name):
    return root.findall(name)


def get_and_check(root, name, length):
    vars = get(root, name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
    if length and len(vars) != length:
        raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
    if length == 1:
        vars = vars[0]
    return vars


def convert(xml_list, json_file):
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    categories = pre_define_categories.copy()
    bnd_id = START_BOUNDING_BOX_ID
    all_categories = {}
    for index, line in enumerate(xml_list):
        xml_f = line
        tree = ET.parse(xml_f)
        root = tree.getroot()

        filename = os.path.basename(xml_f)[:-4] + ".jpg"
        image_id = 20190000001 + index
        size = get_and_check(root, 'size', 1)
        width = int(get_and_check(size, 'width', 1).text)
        height = int(get_and_check(size, 'height', 1).text)
        image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
        json_dict['images'].append(image)
        #  Currently we do not support segmentation
        segmented = get_and_check(root, 'segmented', 1).text
        assert segmented == '0'
        for obj in get(root, 'object'):
            category = get_and_check(obj, 'name', 1).text
            if category in all_categories:
                all_categories[category] += 1
            else:
                all_categories[category] = 1
            if category not in categories:
                if only_care_pre_define_categories:
                    continue
                new_id = len(categories) + 1
                print(
                    "[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
                        category, pre_define_categories, new_id))
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, 'bndbox', 1)
            xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
            ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
            xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
            ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
            assert (xmax > xmin), "xmax <= xmin, {}".format(line)
            assert (ymax > ymin), "ymax <= ymin, {}".format(line)
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {'area': o_width * o_height, 'iscrowd': 0, 'image_id':
                image_id, 'bbox': [xmin, ymin, o_width, o_height],
                   'category_id': category_id, 'id': bnd_id, 'ignore': 0,
                   'segmentation': []}
            json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1

    for cate, cid in categories.items():
        cat = {'supercategory': 'food', 'id': cid, 'name': cate}
        json_dict['categories'].append(cat)
    json_fp = open(json_file, 'w')
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()
    print("------------create {} done--------------".format(json_file))
    print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
                                                                                  all_categories.keys(),
                                                                                  len(pre_define_categories),
                                                                                  pre_define_categories.keys()))
    print("category: id --> {}".format(categories))
    print(categories.keys())
    print(categories.values())


if __name__ == '__main__':
    # 定義你自己的類別
    classes = ['RBC']
    pre_define_categories = {}
    for i, cls in enumerate(classes):
        pre_define_categories[cls] = i + 1
    # 這裡也可以自定義類別id,把上面的註釋掉換成下面這行即可
    # pre_define_categories = {'a1': 1, 'a3': 2, 'a6': 3, 'a9': 4, "a10": 5}
    only_care_pre_define_categories = True  # or False

    # 儲存的json檔案
    save_json_train = 'train.json'
    save_json_val = 'val.json'
    save_json_test = 'test.json'

    # 初始檔案所在的路徑
    xml_dir = "./Annotations1"
    xml_list = glob.glob(xml_dir + "/*.xml")
    xml_list = np.sort(xml_list)

    # 打亂資料集
    np.random.seed(100)
    np.random.shuffle(xml_list)

    # 按比例劃分打亂後的資料集
    train_ratio = 0.8
    val_ratio = 0.1
    train_num = int(len(xml_list) * train_ratio)
    val_num = int(len(xml_list) * val_ratio)
    xml_list_train = xml_list[:train_num]
    xml_list_val = xml_list[train_num: train_num+val_num]
    xml_list_test = xml_list[train_num+val_num:]

    # 將xml檔案轉為coco檔案,在指定目錄下生成三個json檔案(train/test/food)
    convert(xml_list_train, save_json_train)
    convert(xml_list_val, save_json_val)
    convert(xml_list_test, save_json_test)