1. 程式人生 > >目標檢測資料擴增

目標檢測資料擴增

import os
import cv2
import xml.dom.minidom
from xml.dom.minidom import Document
import math
 
#獲取路徑下所有檔案的完整路徑,用於讀取檔案用
def GetFileFromThisRootDir(dir,ext = None):
  allfiles = []
  needExtFilter = (ext != None)
  for root,dirs,files in os.walk(dir):
    for filespath in files:
      filepath = os.path.join(root, filespath)
      extension = os.path.splitext(filepath)[1][1:]
      if needExtFilter and extension in ext:
        allfiles.append(filepath)
      elif not needExtFilter:
        allfiles.append(filepath)
  return allfiles

#影象旋轉用,裡面的angle是角度制的
def im_rotate(im,angle,center = None,scale = 1.0):
    h,w = im.shape[:2]
    if center is None:
        center = (w/2,h/2)
    M = cv2.getRotationMatrix2D(center,angle,scale)
    im_rot = cv2.warpAffine(im,M,(w,h))
    return im_rot


#讀取xml檔案,xmlfile引數表示xml的路徑
def readXml(xmlfile):
    DomTree = xml.dom.minidom.parse(xmlfile)  
    annotation = DomTree.documentElement  
    sizelist = annotation.getElementsByTagName('size') #[<DOM Element: filename at 0x381f788>]  
    heights = sizelist[0].getElementsByTagName('height')
    height = int(heights[0].childNodes[0].data)
    widths =sizelist[0].getElementsByTagName('width')
    width = int(widths[0].childNodes[0].data)
    depths = sizelist[0].getElementsByTagName('depth')
    depth = int(depths[0].childNodes[0].data)
    objectlist = annotation.getElementsByTagName('object')        
    bboxes = []
    for objects in objectlist:  
        namelist = objects.getElementsByTagName('name')  
        class_label = namelist[0].childNodes[0].data  
        bndbox = objects.getElementsByTagName('bndbox')[0]     
        x1_list = bndbox.getElementsByTagName('xmin')  
        x1 = int(float(x1_list[0].childNodes[0].data))  
        y1_list = bndbox.getElementsByTagName('ymin') 
        y1 = int(float(y1_list[0].childNodes[0].data)) 
        x2_list = bndbox.getElementsByTagName('xmax')  
        x2 = int(float(x2_list[0].childNodes[0].data))  
        y2_list = bndbox.getElementsByTagName('ymax')  
        y2 = int(float(y2_list[0].childNodes[0].data))
        #這裡我box的格式【xmin,ymin,xmax,ymax,classname】
        bbox = [x1,y1,x2,y2,class_label]
        bboxes.append(bbox)
    return bboxes,width,height,depth


def rotate_image(angles, angle_rad, imgs_path, anno_new_path):
    j=0 # 計數用
    angle_num = len(angles)
    for img_path in imgs_path :
        #讀取原影象
        im = cv2.imread(img_path)     
        for i in range(angle_num):
            gt_new = []
            im_rot = im_rotate(im,angles[i]) #旋轉
            (H,W,D) = im_rot.shape           #得到旋轉後的影象的高、寬、深度,用於書寫xml
            file_name = os.path.basename(os.path.splitext(img_path)[0])     #得到原圖的名稱 
            #儲存旋轉後圖像
            cv2.imwrite(os.path.join(pro_dir,'P%s_%s.jpg'%(angles[i],file_name)),im_rot) #新的命名方式為P+角度+原圖名稱
            #讀取anno標籤資料,返回相應的資訊
            anno = os.path.join(anno_path,'%s.xml'%file_name)
            [gts,w,h,d] =readXml(anno) 
            #計算旋轉後gt框四點的座標變換
            [xc,yc] = [float(w)/2,float(h)/2]
            for gt in gts:
                #計算左上角點的變換
                x1 = (gt[0]-xc)*math.cos(angle_rad[i]) - (yc-gt[1])*math.sin(angle_rad[i]) + xc
                if int(x1)<=0 : x1=1.0
                if int(x1)>w-1 : x1=w-1                        
                y1 = yc - (gt[0]-xc)*math.sin(angle_rad[i]) - (yc-gt[1])*math.cos(angle_rad[i]) 
                if int(y1)<=0 : y1=1.0       
                if int(y1)>h-1 : y1=h-1 
                #計算右上角點的變換   
                x2 = (gt[2]-xc)*math.cos(angle_rad[i]) - (yc-gt[1])*math.sin(angle_rad[i]) + xc
                if int(x2)<=0 : x2=1.0
                if int(x2)>w-1 : x2=w-1                        
                y2 = yc - (gt[2]-xc)*math.sin(angle_rad[i]) - (yc-gt[1])*math.cos(angle_rad[i])
                if int(y2)<=0 : y2=1.0         
                if int(y2)>h-1 : y2=h-1 
                #計算左下角點的變換
                x3 = (gt[0]-xc)*math.cos(angle_rad[i]) - (yc-gt[3])*math.sin(angle_rad[i]) + xc
                if int(x3)<=0 : x3=1.0
                if int(x3)>w-1 : x3=w-1                        
                y3 = yc - (gt[0]-xc)*math.sin(angle_rad[i]) - (yc-gt[3])*math.cos(angle_rad[i])
                if int(y3)<=0 : y3=1.0         
                if int(y3)>h-1 : y3=h-1
                #計算右下角點的變換
                x4 = (gt[2]-xc)*math.cos(angle_rad[i]) - (yc-gt[3])*math.sin(angle_rad[i]) + xc
                if int(x4)<=0 : x4=1.0          
                if int(x4)>w-1 : x4=w-1
                y4 = yc - (gt[2]-xc)*math.sin(angle_rad[i]) - (yc-gt[3])*math.cos(angle_rad[i])
                if int(y4)<=0 : y4=1.0     
                if int(y4)>h-1 : y4=h-1
                xmin = min(x1,x2,x3,x4)
                xmax = max(x1,x2,x3,x4)
                ymin = min(y1,y2,y3,y4)
                ymax = max(y1,y2,y3,y4)
                #把因為旋轉導致的特別小的 長線型的去掉
                #w_new = xmax-xmin+1
                #h_new = ymax-ymin+1
                #ratio1 = float(w_new)/h_new
                #ratio2 = float(h_new)/w_new
                #if(1.0/6.0<ratio1<6 and 1.0/6.0<ratio2<6 and w_new>9 and h_new>9):
                classname = str(gt[4])
                gt_new.append([xmin,ymin,xmax,ymax,classname])
                #寫出新的xml
                writeXml(anno_new_path,'P%s_%s'%(angles[i],file_name) , W, H, D, gt_new)
            j = j+1
            if j%100==0 : print ('----%s----'%j)
            
            
#寫xml檔案,引數中tmp表示路徑,imgname是檔名(沒有尾綴)ps有尾綴也無所謂
def writeXml(tmp, imgname, w, h, d, bboxes):  
    doc = Document()  
    #owner  
    annotation = doc.createElement('annotation')  
    doc.appendChild(annotation)  
    #owner  
    folder = doc.createElement('folder')  
    annotation.appendChild(folder)  
    folder_txt = doc.createTextNode("VOC2007")  
    folder.appendChild(folder_txt)  
  
    filename = doc.createElement('filename')  
    annotation.appendChild(filename)  
    filename_txt = doc.createTextNode(imgname)  
    filename.appendChild(filename_txt)  
    #ones#  
    source = doc.createElement('source')  
    annotation.appendChild(source)  
  
    database = doc.createElement('database')  
    source.appendChild(database)  
    database_txt = doc.createTextNode("My Database")  
    database.appendChild(database_txt)  
  
    annotation_new = doc.createElement('annotation')  
    source.appendChild(annotation_new)  
    annotation_new_txt = doc.createTextNode("VOC2007")  
    annotation_new.appendChild(annotation_new_txt)  
  
    image = doc.createElement('image')  
    source.appendChild(image)  
    image_txt = doc.createTextNode("flickr")  
    image.appendChild(image_txt) 
    #owner
    owner = doc.createElement('owner')  
    annotation.appendChild(owner)  
  
    flickrid = doc.createElement('flickrid')  
    owner.appendChild(flickrid)  
    flickrid_txt = doc.createTextNode("NULL")  
    flickrid.appendChild(flickrid_txt) 
    
    ow_name = doc.createElement('name')  
    owner.appendChild(ow_name)  
    ow_name_txt = doc.createTextNode("idannel")  
    ow_name.appendChild(ow_name_txt)
    #onee#  
    #twos#  
    size = doc.createElement('size')  
    annotation.appendChild(size)  
  
    width = doc.createElement('width')  
    size.appendChild(width)  
    width_txt = doc.createTextNode(str(w))  
    width.appendChild(width_txt)  
  
    height = doc.createElement('height')  
    size.appendChild(height)  
    height_txt = doc.createTextNode(str(h))  
    height.appendChild(height_txt)  
  
    depth = doc.createElement('depth')  
    size.appendChild(depth)  
    depth_txt = doc.createTextNode(str(d))  
    depth.appendChild(depth_txt)  
    #twoe#  
    segmented = doc.createElement('segmented')  
    annotation.appendChild(segmented)  
    segmented_txt = doc.createTextNode("0")  
    segmented.appendChild(segmented_txt)  
  
    for bbox in bboxes:
        #threes#  
        object_new = doc.createElement("object")  
        annotation.appendChild(object_new)  
        
        name = doc.createElement('name')  
        object_new.appendChild(name)  
        name_txt = doc.createTextNode(str(bbox[4]))  
        name.appendChild(name_txt)  
  
        pose = doc.createElement('pose')  
        object_new.appendChild(pose)  
        pose_txt = doc.createTextNode("Unspecified")  
        pose.appendChild(pose_txt)  
  
        truncated = doc.createElement('truncated')  
        object_new.appendChild(truncated)  
        truncated_txt = doc.createTextNode("0")  
        truncated.appendChild(truncated_txt)  
  
        difficult = doc.createElement('difficult')  
        object_new.appendChild(difficult)  
        difficult_txt = doc.createTextNode("0")  
        difficult.appendChild(difficult_txt)  
        #threes-1#  
        bndbox = doc.createElement('bndbox')  
        object_new.appendChild(bndbox)  
  
        xmin = doc.createElement('xmin')  
        bndbox.appendChild(xmin)  
        xmin_txt = doc.createTextNode(str(float(int(bbox[0]))))  
        xmin.appendChild(xmin_txt)  
  
        ymin = doc.createElement('ymin')  
        bndbox.appendChild(ymin)  
        ymin_txt = doc.createTextNode(str(float(int(bbox[1]))))
        ymin.appendChild(ymin_txt)    
  
        xmax = doc.createElement('xmax')  
        bndbox.appendChild(xmax)  
        xmax_txt = doc.createTextNode(str(float(int(bbox[2]))))
        xmax.appendChild(xmax_txt)  
        
        ymax = doc.createElement('ymax')  
        bndbox.appendChild(ymax)  
        ymax_txt = doc.createTextNode(str(float(int(bbox[3]))))
        ymax.appendChild(ymax_txt)  
        
        print(float(int(bbox[0])),float(int(bbox[1])),float(int(bbox[2])),float(int(bbox[3])))
 
    tempfile = tmp +"/%s.xml"%imgname  
    with open(tempfile, 'wb') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
    return  


if __name__ == '__main__': 
    #voc路徑
    root = 'H:/data/ship'
    img_dir = root + '/images' 
    anno_path = root + '/xml'
    imgs_path=GetFileFromThisRootDir(img_dir)#返回每一張原圖的路徑
   
    #儲存新的anno位置
    anno_new_path = root + '/NewAnnotations' 
    if not os.path.isdir(anno_new_path):
        os.makedirs(anno_new_path)

    #儲存旋轉後圖片儲存的位置
    pro_dir = root+'/train_translate_scale_rotate/'
    if not os.path.isdir(pro_dir):
        os.makedirs(pro_dir)

    #旋轉角的大小,正數表示逆時針旋轉
    angles = [5,90,180,270,355]#角度im_rotate用到的是角度制
    angle_rad = [angle*math.pi/180.0 for angle in angles] #cos三角函式裡要用到弧度制的  
    
    #開始旋轉
    rotate_image(angles, angle_rad, imgs_path, anno_new_path)