COCO資料集轉mask
阿新 • • 發佈:2020-07-02
書接上文,先馬克一下,之後再改
# -*- coding: utf-8 -*- """ Created on Wed Jul 1 14:45:07 2020 @author: mhshao """ from pycocotools.coco import COCO import os import shutil from tqdm import tqdm import matplotlib.pyplot as plt import cv2 from PIL import Image, ImageDraw import skimage.io as io import json import numpy as np''' 路徑引數 ''' #原coco資料集的路徑 dataDir= 'newdata/' #用於儲存新生成的mask資料的路徑 savepath = "newdata/" ''' 資料集引數 ''' #coco有80類,這裡寫要進行二值化的類的名字 #其他沒寫的會被當做背景變成黑色 #如我只需要car、bus、truck這三類資料 classes_names = ['car','bus','truck'] #要處理的資料集,比如val2017、train2017等 #不建議多個數據集在一個list中 #一次提取一個數據集安全點_(:3」∠❀)_ datasets_list = ['val2017'] #生成儲存路徑,函式抄的(›´ω`‹ ) #if the dir is not exists,make it,else delete it def mkr(path): if os.path.exists(path): shutil.rmtree(path) os.mkdir(path) else: os.mkdir(path) #生成mask圖 def mask_generator(coco,width,height,anns_list): mask_pic= np.zeros((height, width)) #生成mask for single in anns_list: mask_single = coco.annToMask(single) mask_pic += mask_single #轉化為255 for row in range(height): for col in range(width): if (mask_pic[row][col] > 0): mask_pic[row][col] = 255 mask_pic = mask_pic.astype(int) ''' #轉為三通道 imgs = np.zeros(shape=(height, width, 3), dtype=np.float32) imgs[:, :, 0] = mask_pic[:, :] imgs[:, :, 1] = mask_pic[:, :] imgs[:, :, 2] = mask_pic[:, :] imgs = imgs.astype(int) ''' return mask_pic #處理json資料並儲存二值mask def get_mask_data(annFile,mask_to_save): #獲取COCO_json的資料 coco = COCO(annFile) #拿到所有需要的圖片資料的id classes_ids = coco.getCatIds(catNms = classes_names) #取所有類別的並集的所有圖片id #如果想要交集,不需要迴圈,直接把所有類別作為引數輸入,即可得到所有類別都包含的圖片 imgIds_list = [] for idx in classes_ids: imgidx = coco.getImgIds(catIds=idx) imgIds_list += imgidx #去除重複的圖片 imgIds_list = list(set(imgIds_list)) #一次性獲取所有影象的資訊 image_info_list = coco.loadImgs(imgIds_list) #對每張圖片生成一個mask for imageinfo in image_info_list: #獲取對應類別的分割資訊 annIds = coco.getAnnIds(imgIds = imageinfo['id'], catIds = classes_ids, iscrowd=None) anns_list = coco.loadAnns(annIds) #生成二值mask圖 mask_image = mask_generator(coco,imageinfo['width'],imageinfo['height'],anns_list) #儲存圖片 file_name = mask_to_save + '/' +imageinfo['file_name'][:-4]+'.png' plt.imsave(file_name , mask_image) #儲存資料到json def save_json_data(json_file,classes_list,image_info_list,anns_list): coco_sub = dict() coco_sub['info'] = dict() coco_sub['licenses'] = [] coco_sub['images'] = [] coco_sub['type'] = 'instances' coco_sub['annotations'] = [] coco_sub['categories'] = [] #以下非必須,為coco資料集的字首資訊 coco_sub['info']['description'] = 'COCO 2017 sub Dataset' coco_sub['info']['url'] = 'https://www.cnblogs.com/lhdb/' coco_sub['info']['version'] = '1.0' coco_sub['info']['year'] = 2020 coco_sub['info']['contributor'] = 'smh' coco_sub['info']['date_created'] = '2020-7-1 10:06' sub_license = dict() sub_license['url'] = 'https://www.cnblogs.com/lhdb/' sub_license['id'] = 1 sub_license['name'] = 'Attribution-NonCommercial-ShareAlike License' coco_sub['licenses'].append(sub_license) #以下為必須插入資訊,包括image、annotations、categories三個欄位 #插入image資訊 coco_sub['images'].extend(image_info_list) #插入annotation資訊 coco_sub['annotations'].extend(anns_list) #插入categories資訊 coco_sub['categories'].extend(classes_list) #自此所有該插入的資料就已經插入完畢啦٩(๑╹ꇴ╹)۶ #最後一步,儲存資料 json.dump(coco_sub, open(json_file, 'w')) if __name__ == '__main__': #按單個數據集進行處理 for dataset in datasets_list: #用來儲存最後生成的mask影象目錄 mask_to_save = savepath + 'masks/' + dataset mkr(savepath + 'masks/') #生成路徑 mkr(mask_to_save) #獲取要處理的json檔案路徑 #我這裡用了之前自己生成的部分類別json #具體方法見我前一篇部落格 annFile='{}/annotations/instances_{}_sub.json'.format(dataDir,dataset) #處理資料 get_mask_data(annFile,mask_to_save) print('Got all the masks of {} from {} ٩( ๑╹ ꇴ ╹)۶'.format(classes_names,dataset))
000000001532.png
000000097924.png
000000121242.png