Yolo系列學習1-Yolov3訓練福彩快三平臺出租自己的數據
實現利用yolov3訓練自己的數據集(voc格式)
方法:
1)構建VOC數據集
將你手中的數據集的標註txt修改成voc格式的txt,voc格式如下:
000002.jpg car 44 28 132 121
000003.jpg car 54 19 243 178
000004.jpg car 168 6 298 164
其中第一列為圖片名,第二列為目標類別,最後是目標的包圍框坐標(左上角和右下角坐標)。
批量修改文件名python代碼:
pic_path = "D:/VOCdevkit/VOC2007/JPEGImages/" piclist = os.listdir(pic_path) total_num = len(piclist) i = 1 for pic in piclist: if pic.endswith(".jpg"): old_path = os.path.join(os.path.abspath(pic_path), pic) new_path = os.path.join(os.path.abspath(pic_path), ‘000‘ + format(str(i), ‘0>5‘) + ‘.jpg‘) os.renames(old_path, new_path) i = i + 1
批量合並文件夾內所有txt文件python代碼:
import os filedir = "D:/DET/" filenames=os.listdir(filedir) f=open(‘train.txt‘,‘w‘) for filename in filenames: filepath = filedir+‘/‘+filename for line in open(filepath): f.writelines(line) f.close() 將該train.txt轉換成voc數據所需要的xml,matlab代碼如下: clc; clear; imgpath=‘D:/VOCdevkit/VOC2007/JPEGImages/‘;%圖像存放文件夾 txtpath=‘D:/train.txt‘;%txt文件 xmlpath_new=‘D:/VOCdevkit/VOC2007/Annotations/‘;%修改後的xml保存文件夾 foldername=‘JPEGImages‘; path=‘/home/zhangzhi/darknet/scripts/VOCdevkit/VOC2007/JPEGImages/‘; fidin=fopen(txtpath,‘r‘); lastname=‘begin‘; while ~feof(fidin) tline=fgetl(fidin); str = regexp(tline, ‘ ‘,‘split‘); filepath=[imgpath,str{1}]; ppath=[path,str{1}]; img=imread(filepath); [h,w,d]=size(img); % imshow(img); % rectangle(‘Position‘,[str2double(str{3}),str2double(str{4}),str2double(str{5})-str2double(str{3}),str2double(str{6})-str2double(str{4})],‘LineWidth‘,4,‘EdgeColor‘,‘r‘); pause(0.1); if strcmp(str{1},lastname)%如果文件名相等,只需增加object object_node=Createnode.createElement(‘object‘); Root.appendChild(object_node); node=Createnode.createElement(‘name‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,str{2}))); object_node.appendChild(node); node=Createnode.createElement(‘pose‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘Unspecified‘))); object_node.appendChild(node); node=Createnode.createElement(‘truncated‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘0‘))); object_node.appendChild(node); node=Createnode.createElement(‘difficult‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘0‘))); object_node.appendChild(node); bndbox_node=Createnode.createElement(‘bndbox‘); object_node.appendChild(bndbox_node); node=Createnode.createElement(‘xmin‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{3})))); bndbox_node.appendChild(node); node=Createnode.createElement(‘ymin‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{4})))); bndbox_node.appendChild(node); node=Createnode.createElement(‘xmax‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{5})))); bndbox_node.appendChild(node); node=Createnode.createElement(‘ymax‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{6})))); bndbox_node.appendChild(node); else copyfile(filepath, ‘JPEGImages‘); if exist(‘Createnode‘,‘var‘) tempname=lastname; tempname=strrep(tempname,‘.jpg‘,‘.xml‘); xmlwrite(tempname,Createnode); end Createnode=com.mathworks.xml.XMLUtils.createDocument(‘annotation‘); Root=Createnode.getDocumentElement; node=Createnode.createElement(‘folder‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,foldername))); Root.appendChild(node); node=Createnode.createElement(‘filename‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,str{1}))); Root.appendChild(node); node=Createnode.createElement(‘path‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,ppath))); Root.appendChild(node); source_node=Createnode.createElement(‘source‘); Root.appendChild(source_node); node=Createnode.createElement(‘database‘); node.appendChild(Createnode.createTextNode(sprintf(‘My Database‘))); source_node.appendChild(node); size_node=Createnode.createElement(‘size‘); Root.appendChild(size_node); node=Createnode.createElement(‘width‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(w)))); size_node.appendChild(node); node=Createnode.createElement(‘height‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(h)))); size_node.appendChild(node); node=Createnode.createElement(‘depth‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(d)))); size_node.appendChild(node); node=Createnode.createElement(‘segmented‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘0‘))); Root.appendChild(node); object_node=Createnode.createElement(‘object‘); Root.appendChild(object_node); node=Createnode.createElement(‘name‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,str{2}))); object_node.appendChild(node); node=Createnode.createElement(‘pose‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘Unspecified‘))); object_node.appendChild(node); node=Createnode.createElement(‘truncated‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘0‘))); object_node.appendChild(node); node=Createnode.createElement(‘difficult‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,‘0‘))); object_node.appendChild(node); bndbox_node=Createnode.createElement(‘bndbox‘); object_node.appendChild(bndbox_node); node=Createnode.createElement(‘xmin‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{3})))); bndbox_node.appendChild(node); node=Createnode.createElement(‘ymin‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{4})))); bndbox_node.appendChild(node); node=Createnode.createElement(‘xmax‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{5})))); bndbox_node.appendChild(node); node=Createnode.createElement(‘ymax‘); node.appendChild(Createnode.createTextNode(sprintf(‘%s‘,num2str(str{6})))); bndbox_node.appendChild(node); lastname=str{1}; end if feof(fidin) tempname=lastname; tempname=strrep(tempname,‘.jpg‘,‘.xml‘); xmlwrite(tempname,Createnode); end end fclose(fidin); file=dir(pwd); for i=1:length(file) if length(file(i).name)>=4 && strcmp(file(i).name(end-3:end),‘.xml‘) fold=fopen(file(i).name,‘r‘); fnew=fopen([xmlpath_new file(i).name],‘w‘); line=1; while ~feof(fold) tline=fgetl(fold); if line==1 line=2; continue; end expression = ‘ ‘; replace=char(9); newStr=regexprep(tline,expression,replace); fprintf(fnew,‘%s\n‘,newStr); end fprintf(‘已處理%s\n‘,file(i).name); fclose(fold); fclose(fnew); delete(file(i).name); end end 生成的xml如下所示 <annotation> <folder>JPEGImages</folder> <filename>00000000.jpg</filename> <path>/home/zhangzhi/darknet/scripts/VOCdevkit/VOC2007/JPEGImages/00000000.jpg</path> <source> <database>My Database</database> </source> <size> <width>512</width> <height>512</height> <depth>3</depth> </size> <segmented>0</segmented> <object> <name>car</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>277</xmin> <ymin>498</ymin> <xmax>304</xmax> <ymax>511</ymax> </bndbox> </object> </annotation> 生成Main中的四個txt(train.txt,val.txt,test.txt,trainval.txt)
txt的內容為沒有後綴名的圖片名稱:
000005
000027
000028
000033
000042
000045
000048
000058
即圖片名字(無後綴),test.txt是測試集,train.txt是訓練集,val.txt是驗證集,trainval.txt是訓練和驗證集。VOC2007中,trainval大概是整個數據集的50%,test也大概是整個數據集的50%;train大概是trainval的50%,val大概是trainval的50%。可參考以下代碼:
%% %該代碼根據已生成的xml,制作VOC2007數據集中的trainval.txt;train.txt;test.txt和val.txt %trainval占總數據集的50%,test占總數據集的50%;train占trainval的50%,val占trainval的50%; %上面所占百分比可根據自己的數據集修改,如果數據集比較少,test和val可少一些 %註意修改下面四個值 xmlfilepath=‘F:/VOCdevkit/VOC2007/Annotations/‘; txtsavepath=‘F:/VOCdevkit/VOC2007/ImageSets/Main/; trainval_percent=0.5;%trainval占整個數據集的百分比,剩下部分就是test所占百分比 train_percent=0.5;%train占trainval的百分比,剩下部分就是val所占百分比 %% xmlfile=dir(xmlfilepath); numOfxml=length(xmlfile)-2;%減去.和.. 總的數據集大小 trainval=sort(randperm(numOfxml,floor(numOfxml*trainval_percent))); test=sort(setdiff(1:numOfxml,trainval)); trainvalsize=length(trainval);%trainval的大小 train=sort(trainval(randperm(trainvalsize,floor(trainvalsize*train_percent)))); val=sort(setdiff(trainval,train)); ftrainval=fopen([txtsavepath ‘trainval.txt‘],‘w‘); ftest=fopen([txtsavepath ‘test.txt‘],‘w‘); ftrain=fopen([txtsavepath ‘train.txt‘],‘w‘); fval=fopen([txtsavepath ‘val.txt‘],‘w‘); for i=1:numOfxml if ismember(i,trainval) fprintf(ftrainval,‘%s\n‘,xmlfile(i+2).name(1:end-4)); if ismember(i,train) fprintf(ftrain,‘%s\n‘,xmlfile(i+2).name(1:end-4)); else fprintf(fval,‘%s\n‘,xmlfile(i+2).name(1:end-4)); end else fprintf(ftest,‘%s\n‘,xmlfile(i+2).name(1:end-4)); end end fclose(ftrainval); fclose(ftrain); fclose(fval); fclose(ftest); 整合文件
新建立一個VOC2007文件夾,在該文件夾下面新建JPEGImages,Annotations,labels,ImageSets文件夾,將所有訓練的圖片均放置在JPEGImages文件夾下,將第二步生成的xml文件放置在Annotations文件夾中,在ImageSets下新建Main文件夾,將第三步生成的四個txt放入其中,將下面步驟生成的文件放置於labels文件夾中
上面步驟的代碼均是在Windows下使用,下面代碼在Ubuntu下使用。生成labels文件:
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
#修改
#sets=[(‘2012‘, ‘train‘), (‘2012‘, ‘val‘), (‘2007‘, ‘train‘), (‘2007‘, ‘val‘), (‘2007‘, ‘test‘)]
sets=[(‘2007‘, ‘train‘), (‘2007‘, ‘val‘), (‘2007‘, ‘test‘)]
#修改
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
classes = ["car", "van", "truck ", "bus"]
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id):
in_file = open(‘VOCdevkit/VOC%s/Annotations/%s.xml‘%(year, image_id))
out_file = open(‘VOCdevkit/VOC%s/labels/%s.txt‘%(year, image_id), ‘w‘)
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find(‘size‘)
w = int(size.find(‘width‘).text)
h = int(size.find(‘height‘).text)
for obj in root.iter(‘object‘):
difficult = obj.find(‘difficult‘).text
cls = obj.find(‘name‘).text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find(‘bndbox‘)
b = (float(xmlbox.find(‘xmin‘).text), float(xmlbox.find(‘xmax‘).text), float(xmlbox.find(‘ymin‘).text), float(xmlbox.find(‘ymax‘).text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + ‘\n‘)
wd = getcwd()
for year, image_set in sets:
if not os.path.exists(‘VOCdevkit/VOC%s/labels/‘%(year)):
os.makedirs(‘VOCdevkit/VOC%s/labels/‘%(year))
image_ids = open(‘VOCdevkit/VOC%s/ImageSets/Main/%s.txt‘%(year, image_set)).read().strip().split()
list_file = open(‘%s_%s.txt‘%(year, image_set), ‘w‘)
for image_id in image_ids:
list_file.write(‘%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n‘%(wd, year, image_id))
convert_annotation(year, image_id)
list_file.close()
#如果需要用train和val的數據一起用來訓練,合並文件:
os.system("cat 2007_train.txt 2007_val.txt > train.txt")
os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt > train.all.txt")
2)修改yolov3的相關文件
修改cfg/voc.data文件,進行如下修改:
classes= 4 # 自己數據集的類別數
train = /home/zhangzhi/darknet/VOCdevkit/2007_train.txt # train文件的路徑
valid = /home/zhangzhi/darknet/VOCdevkit/2007_test.txt # test文件的路徑
names = data/voc.names
backup = backup
修改data/voc.names文件,對應自己的數據集修改類別。
car
van
truck
bus
下載Imagenet上預先訓練的權重
wget https://pjreddie.com/media/files/darknet53.conv.74
修改cfg/yolov3-voc.cfg
找到文件中類似的部分進行修改,共有3處:
[convolutional]
size=1
stride=1
pad=1
<span style="color:#FF6666;">filters=27</span>
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
<span style="color:#FF6666;">classes=4</span>
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
需要改變filters為num/3(classes+1+4),即3(classes+1+4),參考https://github.com/pjreddie/darknet/issues/582,同時需要修改下面的classes的種類。
3)訓練,測試
./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg darknet53.conv.74
./darknet detector test cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_final.weights data/dog.jpg
Yolo系列學習1-Yolov3訓練福彩快三平臺出租自己的數據