目標檢測資料擴增

阿新 • • 發佈：2018-12-09

import os
import cv2
import xml.dom.minidom
from xml.dom.minidom import Document
import math
 
#獲取路徑下所有檔案的完整路徑，用於讀取檔案用
def GetFileFromThisRootDir(dir,ext = None):
  allfiles = []
  needExtFilter = (ext != None)
  for root,dirs,files in os.walk(dir):
    for filespath in files:
      filepath = os.path.join(root, filespath)
      extension = os.path.splitext(filepath)[1][1:]
      if needExtFilter and extension in ext:
        allfiles.append(filepath)
      elif not needExtFilter:
        allfiles.append(filepath)
  return allfiles

#影象旋轉用，裡面的angle是角度制的
def im_rotate(im,angle,center = None,scale = 1.0):
    h,w = im.shape[:2]
    if center is None:
        center = (w/2,h/2)
    M = cv2.getRotationMatrix2D(center,angle,scale)
    im_rot = cv2.warpAffine(im,M,(w,h))
    return im_rot


#讀取xml檔案，xmlfile引數表示xml的路徑
def readXml(xmlfile):
    DomTree = xml.dom.minidom.parse(xmlfile)  
    annotation = DomTree.documentElement  
    sizelist = annotation.getElementsByTagName('size') #[<DOM Element: filename at 0x381f788>]  
    heights = sizelist[0].getElementsByTagName('height')
    height = int(heights[0].childNodes[0].data)
    widths =sizelist[0].getElementsByTagName('width')
    width = int(widths[0].childNodes[0].data)
    depths = sizelist[0].getElementsByTagName('depth')
    depth = int(depths[0].childNodes[0].data)
    objectlist = annotation.getElementsByTagName('object')        
    bboxes = []
    for objects in objectlist:  
        namelist = objects.getElementsByTagName('name')  
        class_label = namelist[0].childNodes[0].data  
        bndbox = objects.getElementsByTagName('bndbox')[0]     
        x1_list = bndbox.getElementsByTagName('xmin')  
        x1 = int(float(x1_list[0].childNodes[0].data))  
        y1_list = bndbox.getElementsByTagName('ymin') 
        y1 = int(float(y1_list[0].childNodes[0].data)) 
        x2_list = bndbox.getElementsByTagName('xmax')  
        x2 = int(float(x2_list[0].childNodes[0].data))  
        y2_list = bndbox.getElementsByTagName('ymax')  
        y2 = int(float(y2_list[0].childNodes[0].data))
        #這裡我box的格式【xmin，ymin，xmax，ymax，classname】
        bbox = [x1,y1,x2,y2,class_label]
        bboxes.append(bbox)
    return bboxes,width,height,depth


def rotate_image(angles, angle_rad, imgs_path, anno_new_path):
    j=0 # 計數用
    angle_num = len(angles)
    for img_path in imgs_path :
        #讀取原影象
        im = cv2.imread(img_path)     
        for i in range(angle_num):
            gt_new = []
            im_rot = im_rotate(im,angles[i]) #旋轉
            (H,W,D) = im_rot.shape           #得到旋轉後的影象的高、寬、深度，用於書寫xml
            file_name = os.path.basename(os.path.splitext(img_path)[0])     #得到原圖的名稱 
            #儲存旋轉後圖像
            cv2.imwrite(os.path.join(pro_dir,'P%s_%s.jpg'%(angles[i],file_name)),im_rot) #新的命名方式為P+角度+原圖名稱
            #讀取anno標籤資料，返回相應的資訊
            anno = os.path.join(anno_path,'%s.xml'%file_name)
            [gts,w,h,d] =readXml(anno) 
            #計算旋轉後gt框四點的座標變換
            [xc,yc] = [float(w)/2,float(h)/2]
            for gt in gts:
                #計算左上角點的變換
                x1 = (gt[0]-xc)*math.cos(angle_rad[i]) - (yc-gt[1])*math.sin(angle_rad[i]) + xc
                if int(x1)<=0 : x1=1.0
                if int(x1)>w-1 : x1=w-1                        
                y1 = yc - (gt[0]-xc)*math.sin(angle_rad[i]) - (yc-gt[1])*math.cos(angle_rad[i]) 
                if int(y1)<=0 : y1=1.0       
                if int(y1)>h-1 : y1=h-1 
                #計算右上角點的變換   
                x2 = (gt[2]-xc)*math.cos(angle_rad[i]) - (yc-gt[1])*math.sin(angle_rad[i]) + xc
                if int(x2)<=0 : x2=1.0
                if int(x2)>w-1 : x2=w-1                        
                y2 = yc - (gt[2]-xc)*math.sin(angle_rad[i]) - (yc-gt[1])*math.cos(angle_rad[i])
                if int(y2)<=0 : y2=1.0         
                if int(y2)>h-1 : y2=h-1 
                #計算左下角點的變換
                x3 = (gt[0]-xc)*math.cos(angle_rad[i]) - (yc-gt[3])*math.sin(angle_rad[i]) + xc
                if int(x3)<=0 : x3=1.0
                if int(x3)>w-1 : x3=w-1                        
                y3 = yc - (gt[0]-xc)*math.sin(angle_rad[i]) - (yc-gt[3])*math.cos(angle_rad[i])
                if int(y3)<=0 : y3=1.0         
                if int(y3)>h-1 : y3=h-1
                #計算右下角點的變換
                x4 = (gt[2]-xc)*math.cos(angle_rad[i]) - (yc-gt[3])*math.sin(angle_rad[i]) + xc
                if int(x4)<=0 : x4=1.0          
                if int(x4)>w-1 : x4=w-1
                y4 = yc - (gt[2]-xc)*math.sin(angle_rad[i]) - (yc-gt[3])*math.cos(angle_rad[i])
                if int(y4)<=0 : y4=1.0     
                if int(y4)>h-1 : y4=h-1
                xmin = min(x1,x2,x3,x4)
                xmax = max(x1,x2,x3,x4)
                ymin = min(y1,y2,y3,y4)
                ymax = max(y1,y2,y3,y4)
                #把因為旋轉導致的特別小的 長線型的去掉
                #w_new = xmax-xmin+1
                #h_new = ymax-ymin+1
                #ratio1 = float(w_new)/h_new
                #ratio2 = float(h_new)/w_new
                #if(1.0/6.0<ratio1<6 and 1.0/6.0<ratio2<6 and w_new>9 and h_new>9):
                classname = str(gt[4])
                gt_new.append([xmin,ymin,xmax,ymax,classname])
                #寫出新的xml
                writeXml(anno_new_path,'P%s_%s'%(angles[i],file_name) , W, H, D, gt_new)
            j = j+1
            if j%100==0 : print ('----%s----'%j)
            
            
#寫xml檔案，引數中tmp表示路徑，imgname是檔名（沒有尾綴）ps有尾綴也無所謂
def writeXml(tmp, imgname, w, h, d, bboxes):  
    doc = Document()  
    #owner  
    annotation = doc.createElement('annotation')  
    doc.appendChild(annotation)  
    #owner  
    folder = doc.createElement('folder')  
    annotation.appendChild(folder)  
    folder_txt = doc.createTextNode("VOC2007")  
    folder.appendChild(folder_txt)  
  
    filename = doc.createElement('filename')  
    annotation.appendChild(filename)  
    filename_txt = doc.createTextNode(imgname)  
    filename.appendChild(filename_txt)  
    #ones#  
    source = doc.createElement('source')  
    annotation.appendChild(source)  
  
    database = doc.createElement('database')  
    source.appendChild(database)  
    database_txt = doc.createTextNode("My Database")  
    database.appendChild(database_txt)  
  
    annotation_new = doc.createElement('annotation')  
    source.appendChild(annotation_new)  
    annotation_new_txt = doc.createTextNode("VOC2007")  
    annotation_new.appendChild(annotation_new_txt)  
  
    image = doc.createElement('image')  
    source.appendChild(image)  
    image_txt = doc.createTextNode("flickr")  
    image.appendChild(image_txt) 
    #owner
    owner = doc.createElement('owner')  
    annotation.appendChild(owner)  
  
    flickrid = doc.createElement('flickrid')  
    owner.appendChild(flickrid)  
    flickrid_txt = doc.createTextNode("NULL")  
    flickrid.appendChild(flickrid_txt) 
    
    ow_name = doc.createElement('name')  
    owner.appendChild(ow_name)  
    ow_name_txt = doc.createTextNode("idannel")  
    ow_name.appendChild(ow_name_txt)
    #onee#  
    #twos#  
    size = doc.createElement('size')  
    annotation.appendChild(size)  
  
    width = doc.createElement('width')  
    size.appendChild(width)  
    width_txt = doc.createTextNode(str(w))  
    width.appendChild(width_txt)  
  
    height = doc.createElement('height')  
    size.appendChild(height)  
    height_txt = doc.createTextNode(str(h))  
    height.appendChild(height_txt)  
  
    depth = doc.createElement('depth')  
    size.appendChild(depth)  
    depth_txt = doc.createTextNode(str(d))  
    depth.appendChild(depth_txt)  
    #twoe#  
    segmented = doc.createElement('segmented')  
    annotation.appendChild(segmented)  
    segmented_txt = doc.createTextNode("0")  
    segmented.appendChild(segmented_txt)  
  
    for bbox in bboxes:
        #threes#  
        object_new = doc.createElement("object")  
        annotation.appendChild(object_new)  
        
        name = doc.createElement('name')  
        object_new.appendChild(name)  
        name_txt = doc.createTextNode(str(bbox[4]))  
        name.appendChild(name_txt)  
  
        pose = doc.createElement('pose')  
        object_new.appendChild(pose)  
        pose_txt = doc.createTextNode("Unspecified")  
        pose.appendChild(pose_txt)  
  
        truncated = doc.createElement('truncated')  
        object_new.appendChild(truncated)  
        truncated_txt = doc.createTextNode("0")  
        truncated.appendChild(truncated_txt)  
  
        difficult = doc.createElement('difficult')  
        object_new.appendChild(difficult)  
        difficult_txt = doc.createTextNode("0")  
        difficult.appendChild(difficult_txt)  
        #threes-1#  
        bndbox = doc.createElement('bndbox')  
        object_new.appendChild(bndbox)  
  
        xmin = doc.createElement('xmin')  
        bndbox.appendChild(xmin)  
        xmin_txt = doc.createTextNode(str(float(int(bbox[0]))))  
        xmin.appendChild(xmin_txt)  
  
        ymin = doc.createElement('ymin')  
        bndbox.appendChild(ymin)  
        ymin_txt = doc.createTextNode(str(float(int(bbox[1]))))
        ymin.appendChild(ymin_txt)    
  
        xmax = doc.createElement('xmax')  
        bndbox.appendChild(xmax)  
        xmax_txt = doc.createTextNode(str(float(int(bbox[2]))))
        xmax.appendChild(xmax_txt)  
        
        ymax = doc.createElement('ymax')  
        bndbox.appendChild(ymax)  
        ymax_txt = doc.createTextNode(str(float(int(bbox[3]))))
        ymax.appendChild(ymax_txt)  
        
        print(float(int(bbox[0])),float(int(bbox[1])),float(int(bbox[2])),float(int(bbox[3])))
 
    tempfile = tmp +"/%s.xml"%imgname  
    with open(tempfile, 'wb') as f:
        f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))
    return  


if __name__ == '__main__': 
    #voc路徑
    root = 'H:/data/ship'
    img_dir = root + '/images' 
    anno_path = root + '/xml'
    imgs_path=GetFileFromThisRootDir(img_dir)#返回每一張原圖的路徑
   
    #儲存新的anno位置
    anno_new_path = root + '/NewAnnotations' 
    if not os.path.isdir(anno_new_path):
        os.makedirs(anno_new_path)

    #儲存旋轉後圖片儲存的位置
    pro_dir = root+'/train_translate_scale_rotate/'
    if not os.path.isdir(pro_dir):
        os.makedirs(pro_dir)

    #旋轉角的大小，正數表示逆時針旋轉
    angles = [5,90,180,270,355]#角度im_rotate用到的是角度制
    angle_rad = [angle*math.pi/180.0 for angle in angles] #cos三角函式裡要用到弧度制的  
    
    #開始旋轉
    rotate_image(angles, angle_rad, imgs_path, anno_new_path)

目標檢測資料擴增

import os import cv2 import xml.dom.minidom from xml.dom.minidom import Document import math #獲取路徑下所有檔案的完整路徑，用於讀取檔案用 def GetFileFromThis

實踐目標檢測--資料準備

環境 Windows10+Python3.6 實踐內容手勢識別（識別1，2，3，4，5的手勢）構造資料集 1、錄製視訊，用於資料集收集 2、使用opencv的方法擷取視訊幀 import cv2 vc = cv2.VideoCapture('shoushi3.

製作自己的目標檢測資料集再利用tf-faster-rcnn訓練

1.製作資料集的工具我利用資料集標註工具是精靈標註助手，我認為很好用。奉勸一句標註資料集時不要隨便找人給你標，如果他沒有用心給你標註，你在後面訓練會出現很多問題。在一開始標註資料集的時候不要一下子給他標註完，先標註幾十張圖片，然後用在你的網路裡，看看有沒有錯誤，如果沒有問

深度學習-目標檢測資料集以及評估指標

資料集和效能指標目標檢測常用的資料集包括PASCAL VOC，ImageNet，MS COCO等資料集，這些資料集用於研究者測試演算法效能或者用於競賽。目標檢測的效能指標要考慮檢測物體的位置以及預測類別的準確性，下面我們會說到一些常用的效能評估指標。資料集 PA

航空遙感影象(Aerial Images)目標檢測資料集彙總

常規目標檢測資料集有很多，現在前沿的目標檢測演算法（如Faster R-CNN, Yolo, SSD, Mask R-CNN等）基本都是在這些常規資料集上實驗的，但是，基於常規資料集訓練的分類器，在航空遙感影象上的檢測效果並不好，主要原因是航空遙感影象有其特殊性： 1，尺度

目標檢測資料集

1. Pascal VOC Dataset資料集共 21分類VOC 2012VOC 20072. ILSVRC2012部分下載連結：3、Coco資料集（微軟建立，要翻牆）下載官網http://cocodataset.org/

關於coco目標檢測資料集格式

coco目標檢測資料集標註目標資訊採用的是資料格式是json，其內容本質是一種字典結構，字典堆疊和列表資訊內容維護。coco裡面的id和類名字對應：總共80類，但id號到90！coco_id_name_map={1: 'person', 2: 'bicycle', 3: 'c

實踐目標檢測--讀取資料集

描述對於普通的影象分類，label只用表示圖片的類別就行，而目標檢測，不僅僅包括了類別的判斷，還包含了類別的位置資訊。所以在神經網路的構造上和資料的讀取上都大不相同。深度學習框架選用 MxNet Gluon，經過個人的對比，對於單GPU或CPU，MxNet的速度和記憶體佔用要遠優於T

影象分類和目標檢測常用資料集介紹

The Caltech-UCSD birds-200-2011 dataset（加利福尼亞理工學院鳥類資料集）：分類數量：200 圖片數量：11,788 每個影象的註釋：15個部分位置，312個二進位制屬性，1邊界框 Labeled faces in the wild： L

SSD-Tensorflow 目標檢測（自定義資料集（VOC2007格式））

一、準備搭建SSD框架，下載解壓即可下載pascalvoc資料，自己的資料根據voc格式改寫（圖片的名稱，不用拘泥於6位數字，其他命名也可以）資料集下載點選解壓後不要混合在一個資料夾下 VOCtrainval用來訓練，VOCtest用來測試。 VOCtrai

TX2實現yolov2(目標檢測,計數，訓練自己的資料集）

git clone https://github.com/pjreddie/darknet cd darknet 2.修改Makefile 我們開啟MakeFile檔案： nano Makefile Makefile內容如下(我們

YOLOv2目標檢測_單目標_訓練自己資料全過程（自用）

1. 製作符合要求的VOC資料集目標：製作如下格式的資料夾格式： --VOC2017（大寫字母+數字） --Annotations（存放儲存標註資訊的xml） --ImageSets --Main（存放儲存圖片名的train.txttest.txt） --Layo

目標檢測之三 Win10 +VS2015+雙gtx Titan X使用YOLO v2訓練VOC資料

Win10 +VS2015+雙gtx Titan X使用YOLO v2訓練VOC資料本文主要是記錄在win10下，yolo v2訓練資料的一整個過程，網上很多是在linux下的，不得不說這個東西還是linux下方便啊，主要依賴項： VS2015或者VS2013都

caffe深度學習【九】目標檢測 yolo v1的caffe實現基於VOC2007資料集

YOLO v1演算法原文的作者是在darknet框架下實現的，原文作者的實現，這裡主要講的是caffe版本的YOLO實現，主要採用yeahkun寫的：點選開啟連結其實只是步驟相對來說有點繁瑣，但是要跑通並不困難：大致步驟包括： 1、編譯ca

gluoncv 目標檢測，訓練自己的資料集

https://gluon-cv.mxnet.io/build/examples_datasets/detection_custom.html 官方提供兩種方案，一種是lst檔案，一種是xml檔案（voc的格式）； voc 格式的標註有標註工具，但是你如果是json檔案標註的資訊，或者其他格式的，你就要轉

gluoncv 訓練自己的資料集，進行目標檢測

跑了一晚上的模型，實在佔GPU資源，這兩天已經有很多小朋友說我了。我選擇了其中一個引數。 https://github.com/dmlc/gluon-cv/blob/master/scripts/detection/faster_rcnn/train_faster_rcnn.py train_faster

目標檢測演算法SSD_訓練自有資料

基於Caffe框架的目標檢測演算法SSD–進行自有資料替換並訓練主要步驟：標籤資料預處理圖片資料預處理資料及標籤的VOC格式以及LMDB格式轉化網路訓練其他問題分析 1.標籤資料預處理由於拿到的是YOLOv3格式的訓練集label，

【FPN車輛目標檢測】資料集獲取以及Windows7+TensorFlow+Faster-RCNN+FPN程式碼環境配置和執行過程實測

PS 最近在學目標檢測想用最新的FPN網路，剛好看到這篇部落格https://blog.csdn.net/Angela_qin/article/details/80944604嘗試把它復現，說的小白一點。 1.資料集獲取博主只說是車輛目標檢測沒將資料集在哪裡獲取。我在程式碼中發現E:/st

目標檢測訓練資料增廣--旋轉+尺度+顏色+裁剪

原文連結：https://blog.csdn.net/wei_guo_xd/article/details/74199729常用的影象擴充方式有：水平翻轉，裁剪，視角變換，jpeg壓縮，尺度變換，顏色變換，旋轉當用於分類資料集時，這些變換方法可以全部被使用，然而考慮到目標檢測

自有資料集上,如何用keras最簡單訓練YOLOv3目標檢測

qqwweee/keras-yolo3是最簡單的自資料訓練yolov3的開源專案了。非常簡單，相比其他的開源專案，太適合新手練習yolov3。而公開的很多開源框架的都是基於VOC/COCO來寫預訓練，整理資料起麻煩不少。本來筆者看到mxnet/gluoncv有yolov3的自訓練，而

目標檢測資料擴增

相關推薦