MSCOCO資料標註詳解
參考:
完整程式碼點選此處
JSON檔案
json檔案主要包含以下幾個欄位:
詳細描述參考 COCO 標註詳解
{
"info": info, # dict
"licenses": [license], # list ,內部是dict
"images": [image], # list ,內部是dict
"annotations": [annotation], # list ,內部是dict
"categories": # list ,內部是dict
}
開啟JSON檔案檢視資料特點
由於JSON檔案太大,很多都是重複定義的,所以只提取一張圖片,儲存成新的JSON檔案,便於觀察。
# -*- coding:utf-8 -*-
from __future__ import print_function
from pycocotools.coco import COCO
import os, sys, zipfile
import urllib.request
import shutil
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import json
json_file='./annotations/instances_val2017.json' # # Object Instance 型別的標註
# person_keypoints_val2017.json # Object Keypoint 型別的標註格式
# captions_val2017.json # Image Caption的標註格式
data=json.load(open(json_file,'r'))
data_2={}
data_2['info']=data['info']
data_2['licenses']=data['licenses']
data_2['images']=[data['images'][0]] # 只提取第一張圖片
data_2['categories' ]=data['categories']
annotation=[]
# 通過imgID 找到其所有物件
imgID=data_2['images'][0]['id']
for ann in data['annotations']:
if ann['image_id']==imgID:
annotation.append(ann)
data_2['annotations']=annotation
# 儲存到新的JSON檔案,便於檢視資料特點
json.dump(data_2,open('./new_instances_val2017.json','w'),indent=4) # indent=4 更加美觀顯示
Object Instance 型別的標註格式
主要有以下幾個欄位:
info
"info": { # 資料集資訊描述
"description": "COCO 2017 Dataset", # 資料集描述
"url": "http://cocodataset.org", # 下載地址
"version": "1.0", # 版本
"year": 2017, # 年份
"contributor": "COCO Consortium", # 提供者
"date_created": "2017/09/01" # 資料建立日期
},
licenses
"licenses": [
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License"
},
……
……
],
images
"images": [
{
"license": 4,
"file_name": "000000397133.jpg", # 圖片名
"coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg",# 網路地址路徑
"height": 427, # 高
"width": 640, # 寬
"date_captured": "2013-11-14 17:02:52", # 資料獲取日期
"flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg",# flickr網路地址
"id": 397133 # 圖片的ID編號(每張圖片ID是唯一的)
},
……
……
],
categories
"categories": [ # 類別描述
{
"supercategory": "person", # 主類別
"id": 1, # 類對應的id (0 預設為背景)
"name": "person" # 子類別
},
{
"supercategory": "vehicle",
"id": 2,
"name": "bicycle"
},
{
"supercategory": "vehicle",
"id": 3,
"name": "car"
},
……
……
],
注: bicycle 與car都屬於vehicle,但兩者又屬於不同的類別。例如:羊(主類別)分為山羊、綿羊、藏羚羊(子類別)等
annotations
"annotation": [
{
"segmentation": [ # 物件的邊界點(邊界多邊形)
[
224.24,297.18,# 第一個點 x,y座標
228.29,297.18, # 第二個點 x,y座標
234.91,298.29,
……
……
225.34,297.55
]
],
"area": 1481.3806499999994, # 區域面積
"iscrowd": 0, #
"image_id": 397133, # 對應的圖片ID(與images中的ID對應)
"bbox": [217.62,240.54,38.99,57.75], # 定位邊框 [x,y,w,h]
"category_id": 44, # 類別ID(與categories中的ID對應)
"id": 82445 # 物件ID,因為每一個影象有不止一個物件,所以要對每一個物件編號(每個物件的ID是唯一的)
},
……
……
]
注意,單個的物件(iscrowd=0)可能需要多個polygon來表示,比如這個物件在影象中被擋住了。而iscrowd=1時(將標註一組物件,比如一群人)的segmentation使用的就是RLE格式。
視覺化
現在呼叫cocoapi
顯示剛生成的JSON檔案,並檢查是否有問題。
# -*- coding:utf-8 -*-
from __future__ import print_function
from pycocotools.coco import COCO
import os, sys, zipfile
import urllib.request
import shutil
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
annFile='./new_instances_val2017.json'
coco=COCO(annFile)
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))
nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))
# imgIds = coco.getImgIds(imgIds = [324158])
imgIds = coco.getImgIds()
img = coco.loadImgs(imgIds[0])[0]
dataDir = '.'
dataType = 'val2017'
I = io.imread('%s/%s/%s'%(dataDir,dataType,img['file_name']))
plt.axis('off')
plt.imshow(I)
plt.show()
# load and display instance annotations
# 載入例項掩膜
# catIds = coco.getCatIds(catNms=['person','dog','skateboard']);
# catIds=coco.getCatIds()
catIds=[]
for ann in coco.dataset['annotations']:
if ann['image_id']==imgIds[0]:
catIds.append(ann['category_id'])
plt.imshow(I); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
# initialize COCO api for person keypoints annotations
annFile = '{}/annotations/person_keypoints_{}.json'.format(dataDir,dataType)
coco_kps=COCO(annFile)
# load and display keypoints annotations
# 載入肢體關鍵點
plt.imshow(I); plt.axis('off')
ax = plt.gca()
annIds = coco_kps.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco_kps.loadAnns(annIds)
coco_kps.showAnns(anns)
# initialize COCO api for caption annotations
annFile = '{}/annotations/captions_{}.json'.format(dataDir,dataType)
coco_caps=COCO(annFile)
# load and display caption annotations
# 載入文字描述
annIds = coco_caps.getAnnIds(imgIds=img['id']);
anns = coco_caps.loadAnns(annIds)
coco_caps.showAnns(anns)
plt.imshow(I); plt.axis('off'); plt.show()
A man is in a kitchen making pizzas.
Man in apron standing on front of oven with pans and bakeware
A baker is working in the kitchen rolling dough.
A person standing by a stove in a kitchen.
A table with pies being made and a person standing near a wall with pots and pans hanging on the wall.
仿照COCO JSON檔案
仿照COCO的資料格式,將labelme的JSON改造成COCO的JSON
首先是要labelme
做好圖片標註
說明:(類別不一定對,只是為了說明問題)
bobcat-美國短耳貓
plushcat-布偶貓
deerhound-小鹿犬
mainecat-緬因貓
golden-金毛
將labelme的JSON轉成COCO格式JSON
這裡寫一個class實現以下功能,labelme2COCO.py中 的部分程式碼如下:
def image(self,data,num):
image={}
img = utils.img_b64_to_array(data['imageData']) # 解析原圖片資料
# img=io.imread(data['imagePath']) # 通過圖片路徑開啟圖片
# img = cv2.imread(data['imagePath'], 0)
height, width = img.shape[:2]
img = None
image['height']=height
image['width'] = width
image['id']=num+1
image['file_name'] = data['imagePath'].split('/')[-1]
self.height=height
self.width=width
return image
def categorie(self,label):
categorie={}
categorie['supercategory'] = label[0]
categorie['id']=len(self.label)+1 # 0 預設為背景
categorie['name'] = label[1]
return categorie
def annotation(self,points,label,num):
annotation={}
annotation['segmentation']=[list(np.asarray(points).flatten())]
annotation['iscrowd'] = 0
annotation['image_id'] = num+1
# annotation['bbox'] = str(self.getbbox(points)) # 使用list儲存json檔案時報錯(不知道為什麼)
# list(map(int,a[1:-1].split(','))) a=annotation['bbox'] 使用該方式轉成list
annotation['bbox'] = list(map(float,self.getbbox(points)))
annotation['category_id'] = self.getcatid(label)
annotation['id'] = self.annID
return annotation
注:這裡只實現images、categories、annotations三個欄位內容,因為只用到這幾個欄位
視覺化資料
這部分是使用COCO的API介面開啟剛才自己生成的JSON檔案,以驗證是否存在問題。
visualization.py
# -*- coding:utf-8 -*-
from __future__ import print_function
from pycocotools.coco import COCO
import os, sys, zipfile
import urllib.request
import shutil
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
annFile='./new.json'
coco=COCO(annFile)
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))
nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))
# imgIds = coco.getImgIds(imgIds = [324158])
imgIds = coco.getImgIds()
imgId=np.random.randint(0,len(imgIds))
img = coco.loadImgs(imgIds[imgId])[0]
dataDir = '.'
# dataType = 'val2017'
# I = io.imread('%s/%s/%s'%(dataDir,dataType,img['file_name']))
I = io.imread('%s/%s'%(dataDir,img['file_name']))
plt.axis('off')
plt.imshow(I)
plt.show()
# load and display instance annotations
# 載入例項掩膜
# catIds = coco.getCatIds(catNms=['person','dog','skateboard']);
# catIds=coco.getCatIds()
catIds=[]
for ann in coco.dataset['annotations']:
if ann['image_id']==imgIds[imgId]:
catIds.append(ann['category_id'])
plt.imshow(I); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
plt.show()
顯示結果:
Object Keypoint 型別的標註格式
執行指令碼one_image_json.py
得到單張圖片的JSON資訊。
基本上內容與Object Instance的標註格式一樣,不同的地方在於categories、annotations欄位內容不一樣。
主要內容有:
{
"info": {
"description": "COCO 2017 Dataset",
"url": "http://cocodataset.org",
"version": "1.0",
"year": 2017,
"contributor": "COCO Consortium",
"date_created": "2017/09/01"
},
"licenses": [
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License"
},
……
……
],
"images": [
{
"license": 4,
"file_name": "000000397133.jpg", # 圖片名
"coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg", # coco 連結地址
"height": 427, # 高
"width": 640, # 寬
"date_captured": "2013-11-14 17:02:52", # 獲取日期
"flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg", # flickr 連結地址
"id": 397133 # 圖片ID(每張圖片ID唯一)
}
],
"categories": [
{
"supercategory": "person", # 主類
"id": 1, # class id
"name": "person", # 子類(具體類別)
"keypoints": [ # 相比Object Instance多了這個欄位
"nose",
"left_eye",
"right_eye",
"left_ear",
"right_ear",
"left_shoulder",
"right_shoulder",
"left_elbow",
"right_elbow",
"left_wrist",
"right_wrist",
"left_hip",
"right_hip",
"left_knee",
"right_knee",
"left_ankle",
"right_ankle"
],
"skeleton": [ # 骨架
[
16,14
],
[
14,12
],
……
……
[
5,7
]
]
}
],
"annotations": [
{
"segmentation": [
[
446.71,70.66, # 多邊形(物件mask)第一個點 x,y
466.07,72.89,
471.28,78.85,
473.51,88.52,
473.51,98.2,
……
……
443.74,69.92
]
],
"num_keypoints": 13, # 關鍵點數
"area": 17376.91885,
"iscrowd": 0,
"keypoints": [
# v=0 表示這個關鍵點沒有標註(這種情況下x=y=v=0)
# v=1 表示這個關鍵點標註了但是不可見(被遮擋了)
# v=2 表示這個關鍵點標註了同時也可見
433,94,2, # x,y,v
434,90,2,
0,0,0,
443,98,2,
0,0,0,
……
……
],
"image_id": 397133, # 對應的圖片ID
"bbox": [
388.66,69.92,109.41,277.62 # [x,y,w,h] 物件定位框
],
"category_id": 1, # 類別id
"id": 200887 # 物件id(每個物件id都是唯一的,即不能出現重複)
},
……
……
]
}
Image Caption的標註格式
執行指令碼one_image_json.py
得到單張圖片的JSON資訊。
基本上內容與Object Instance的標註格式一樣,不同的地方在於annotations欄位內容不一樣以及沒有categories欄位
{
"info": {
"description": "COCO 2017 Dataset",
"url": "http://cocodataset.org",
"version": "1.0",
"year": 2017,
"contributor": "COCO Consortium",
"date_created": "2017/09/01"
},
"licenses": [
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License"
},
……
……
],
"images": [
{
"license": 4,
"file_name": "000000397133.jpg",
"coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg",
"height": 427,
"width": 640,
"date_captured": "2013-11-14 17:02:52",
"flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg",
"id": 397133
}
],
"annotations": [
{
"image_id": 397133, # 圖片ID(唯一)
"id": 370509, # 物件ID(唯一) (沒有類別ID)
"caption": "A man is in a kitchen making pizzas." # 圖片描述
},
……
……
{
"image_id": 397133,
"id": 375891,
"caption": "A table with pies being made and a person standing near a wall with pots and pans hanging on the wall."
}
]
}
這三種標註的info,licenses,images的內容是一樣的。