1. 程式人生 > 其它 >detectron2訓練自己的資料集_目標檢測基於kerasYoloV3訓練自己的資料集

detectron2訓練自己的資料集_目標檢測基於kerasYoloV3訓練自己的資料集

技術標籤:detectron2訓練自己的資料集mask rcnn訓練自己的資料集maskrcnn訓練自己的資料集mmdetection訓練自己的資料ssd目標檢測訓練自己的資料yolov3資料集下載訓練

資料準備

  1. 首先按照voc格式提前準備好資料夾,樣例如下:

    2f3bffd8616a477e2ec9a662ce437920.png

  2. 生成訓練集測試集,train.txt,test.txt

importos
importrandom

test_percent=0.1#可根據自己需要進行調整
train_percent=0.9
xmlfilepath='Annotations'
txtsavepath='ImageSets\Main'
total_xml=os.listdir(xmlfilepath)

num=len(total_xml)
list=range(num)
te=int(num*test_percent)
#tr=int(num*train_percent)
test=random.sample(list,te)
#train=random.sample(trainval,tr)

ftest=open('ImageSets/Main/test.txt','w')
ftrain=open('ImageSets/Main/train.txt','w')

foriinlist:
name=total_xml[i][:-4]+'\n'
ifiintest:
ftest.write(name)
else:
ftrain.write(name)

ftrain.close()
ftest.close()

3.定義標籤名稱,fish_class.txt

human_face
fish

4.定製屬於自己資料集anchor
這裡主要用kmeans對自己的資料進行聚類所得,具體原理,以後再詳細講

importnumpyasnp


classYOLO_Kmeans:

def__init__(self,cluster_number,filename):
self.cluster_number=cluster_number
self.filename="2012_train.txt"

defiou(self,boxes,clusters):#1box->kclusters
n=boxes.shape[0]
k=self.cluster_number

box_area=boxes[:,0]*boxes[:,1]
box_area=box_area.repeat(k)
box_area=np.reshape(box_area,(n,k))

cluster_area=clusters[:,0]*clusters[:,1]
cluster_area=np.tile(cluster_area,[1,n])
cluster_area=np.reshape(cluster_area,(n,k))

box_w_matrix=np.reshape(boxes[:,0].repeat(k),(n,k))
cluster_w_matrix=np.reshape(np.tile(clusters[:,0],(1,n)),(n,k))
min_w_matrix=np.minimum(cluster_w_matrix,box_w_matrix)

box_h_matrix=np.reshape(boxes[:,1].repeat(k),(n,k))
cluster_h_matrix=np.reshape(np.tile(clusters[:,1],(1,n)),(n,k))
min_h_matrix=np.minimum(cluster_h_matrix,box_h_matrix)
inter_area=np.multiply(min_w_matrix,min_h_matrix)

result=inter_area/(box_area+cluster_area-inter_area)
returnresult

defavg_iou(self,boxes,clusters):
accuracy=np.mean([np.max(self.iou(boxes,clusters),axis=1)])
returnaccuracy

defkmeans(self,boxes,k,dist=np.median):
box_number=boxes.shape[0]
distances=np.empty((box_number,k))
last_nearest=np.zeros((box_number,))
np.random.seed()
clusters=boxes[np.random.choice(
box_number,k,replace=False)]#initkclusters
whileTrue:

distances=1-self.iou(boxes,clusters)

current_nearest=np.argmin(distances,axis=1)
if(last_nearest==current_nearest).all():
break#clusterswon'tchange
forclusterinrange(k):
clusters[cluster]=dist(#updateclusters
boxes[current_nearest==cluster],axis=0)

last_nearest=current_nearest

returnclusters

defresult2txt(self,data):
f=open("yolo_anchors.txt",'w')
row=np.shape(data)[0]
foriinrange(row):
ifi==0:
x_y="%d,%d"%(data[i][0],data[i][1])
else:
x_y=",%d,%d"%(data[i][0],data[i][1])
f.write(x_y)
f.close()

deftxt2boxes(self):
f=open(self.filename,'r')
dataSet=[]
forlineinf:
infos=line.split("")
length=len(infos)
foriinrange(1,length):
width=int(infos[i].split(",")[2])-\
int(infos[i].split(",")[0])
height=int(infos[i].split(",")[3])-\
int(infos[i].split(",")[1])
dataSet.append([width,height])
result=np.array(dataSet)
f.close()
returnresult

deftxt2clusters(self):
all_boxes=self.txt2boxes()
result=self.kmeans(all_boxes,k=self.cluster_number)
result=result[np.lexsort(result.T[0,None])]
self.result2txt(result)
print("Kanchors:\n{}".format(result))
print("Accuracy:{:.2f}%".format(
self.avg_iou(all_boxes,result)*100))


if__name__=="__main__":
cluster_number=9#這裡要根據是使用的yoloV3還是tiny區分設定
filename="2012_train.txt"
kmeans=YOLO_Kmeans(cluster_number,filename)
kmeans.txt2clusters()

5.生成voc_annotation檔案

importxml.etree.ElementTreeasET
fromosimportgetcwd

#sets=[('2007','train'),('2007','val'),('2007','test')]
sets=[('2007','train'),('2007','test')]

classes=["human_face","fish"]#這裡換成自己剛才定義的類別名字


defconvert_annotation(year,image_id,list_file):
in_file=open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year,image_id))
tree=ET.parse(in_file)
root=tree.getroot()

forobjinroot.iter('object'):
difficult=obj.find('difficult').text
cls=obj.find('name').text
ifclsnotinclassesorint(difficult)==1:
continue
cls_id=classes.index(cls)
xmlbox=obj.find('bndbox')
b=(int(xmlbox.find('xmin').text),int(xmlbox.find('ymin').text),int(xmlbox.find('xmax').text),int(xmlbox.find('ymax').text))
list_file.write(""+",".join([str(a)forainb])+','+str(cls_id))

wd=getcwd()

foryear,image_setinsets:
image_ids=open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year,image_set)).read().strip().split()
list_file=open('%s_%s.txt'%(year,image_set),'w')
forimage_idinimage_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd,year,image_id))
convert_annotation(year,image_id,list_file)
list_file.write('\n')
list_file.close()

6.由於我們是在原來作者的基礎上進行fine-tune的,所以需要將.weights檔案轉化為h5檔案

#!/usr/bin/envpython
"""
ReadsDarknetconfigandweightsandcreatesKerasmodelwithTFbackend.
"""

importargparse
importconfigparser
importio
importos
fromcollectionsimportdefaultdict

importnumpyasnp
fromkerasimportbackendasK
fromkeras.layersimport(Conv2D,Input,ZeroPadding2D,Add,
UpSampling2D,MaxPooling2D,Concatenate)
fromkeras.layers.advanced_activationsimportLeakyReLU
fromkeras.layers.normalizationimportBatchNormalization
fromkeras.modelsimportModel
fromkeras.regularizersimportl2
fromkeras.utils.vis_utilsimportplot_modelasplot


parser=argparse.ArgumentParser(description='DarknetToKerasConverter.')
parser.add_argument('config_path',help='PathtoDarknetcfgfile.')
parser.add_argument('weights_path',help='PathtoDarknetweightsfile.')
parser.add_argument('output_path',help='PathtooutputKerasmodelfile.')
parser.add_argument(
'-p',
'--plot_model',
help='PlotgeneratedKerasmodelandsaveasimage.',
action='store_true')
parser.add_argument(
'-w',
'--weights_only',
help='SaveasKerasweightsfileinsteadofmodelfile.',
action='store_true')

defunique_config_sections(config_file):
"""Convertallconfigsectionstohaveuniquenames.
Addsuniquesuffixestoconfigsectionsforcompabilitywithconfigparser.
"""
section_counters=defaultdict(int)
output_stream=io.StringIO()
withopen(config_file)asfin:
forlineinfin:
ifline.startswith('['):
section=line.strip().strip('[]')
_section=section+'_'+str(section_counters[section])
section_counters[section]+=1
line=line.replace(section,_section)
output_stream.write(line)
output_stream.seek(0)
returnoutput_stream

#%%
def_main(args):
config_path=os.path.expanduser(args.config_path)
weights_path=os.path.expanduser(args.weights_path)
assertconfig_path.endswith('.cfg'),'{}isnota.cfgfile'.format(
config_path)
assertweights_path.endswith(
'.weights'),'{}isnota.weightsfile'.format(weights_path)

output_path=os.path.expanduser(args.output_path)
assertoutput_path.endswith(
'.h5'),'outputpath{}isnota.h5file'.format(output_path)
output_root=os.path.splitext(output_path)[0]

#Loadweightsandconfig.
print('Loadingweights.')
weights_file=open(weights_path,'rb')
major,minor,revision=np.ndarray(
shape=(3,),dtype='int32',buffer=weights_file.read(12))
if(major*10+minor)>=2andmajor<1000andminor<1000:
seen=np.ndarray(shape=(1,),dtype='int64',buffer=weights_file.read(8))
else:
seen=np.ndarray(shape=(1,),dtype='int32',buffer=weights_file.read(4))
print('WeightsHeader:',major,minor,revision,seen)

print('ParsingDarknetconfig.')
unique_config_file=unique_config_sections(config_path)
cfg_parser=configparser.ConfigParser()
cfg_parser.read_file(unique_config_file)

print('CreatingKerasmodel.')
input_layer=Input(shape=(None,None,3))
prev_layer=input_layer
all_layers=[]

weight_decay=float(cfg_parser['net_0']['decay']
)if'net_0'incfg_parser.sections()else5e-4
count=0
out_index=[]
forsectionincfg_parser.sections():
print('Parsingsection{}'.format(section))
ifsection.startswith('convolutional'):
filters=int(cfg_parser[section]['filters'])
size=int(cfg_parser[section]['size'])
stride=int(cfg_parser[section]['stride'])
pad=int(cfg_parser[section]['pad'])
activation=cfg_parser[section]['activation']
batch_normalize='batch_normalize'incfg_parser[section]

padding='same'ifpad==1andstride==1else'valid'

#Settingweights.
#Darknetserializesconvolutionalweightsas:
#[bias/beta,[gamma,mean,variance],conv_weights]
prev_layer_shape=K.int_shape(prev_layer)

weights_shape=(size,size,prev_layer_shape[-1],filters)
darknet_w_shape=(filters,weights_shape[2],size,size)
weights_size=np.product(weights_shape)

print('conv2d','bn'
ifbatch_normalizeelse'',activation,weights_shape)

conv_bias=np.ndarray(
shape=(filters,),
dtype='float32',
buffer=weights_file.read(filters*4))
count+=filters

ifbatch_normalize:
bn_weights=np.ndarray(
shape=(3,filters),
dtype='float32',
buffer=weights_file.read(filters*12))
count+=3*filters

bn_weight_list=[
bn_weights[0],#scalegamma
conv_bias,#shiftbeta
bn_weights[1],#runningmean
bn_weights[2]#runningvar
]

conv_weights=np.ndarray(
shape=darknet_w_shape,
dtype='float32',
buffer=weights_file.read(weights_size*4))
count+=weights_size

#DarkNetconv_weightsareserializedCaffe-style:
#(out_dim,in_dim,height,width)
#WewouldliketosetthesetoTensorfloworder:
#(height,width,in_dim,out_dim)
conv_weights=np.transpose(conv_weights,[2,3,1,0])
conv_weights=[conv_weights]ifbatch_normalizeelse[
conv_weights,conv_bias
]

#Handleactivation.
act_fn=None
ifactivation=='leaky':
pass#Addadvancedactivationlater.
elifactivation!='linear':
raiseValueError(
'Unknownactivationfunction`{}`insection{}'.format(
activation,section))

#CreateConv2Dlayer
ifstride>1:
#Darknetusesleftandtoppaddinginsteadof'same'mode
prev_layer=ZeroPadding2D(((1,0),(1,0)))(prev_layer)
conv_layer=(Conv2D(
filters,(size,size),
strides=(stride,stride),
kernel_regularizer=l2(weight_decay),
use_bias=notbatch_normalize,
weights=conv_weights,
activation=act_fn,
padding=padding))(prev_layer)

ifbatch_normalize:
conv_layer=(BatchNormalization(
weights=bn_weight_list))(conv_layer)
prev_layer=conv_layer

ifactivation=='linear':
all_layers.append(prev_layer)
elifactivation=='leaky':
act_layer=LeakyReLU(alpha=0.1)(prev_layer)
prev_layer=act_layer
all_layers.append(act_layer)

elifsection.startswith('route'):
ids=[int(i)foriincfg_parser[section]['layers'].split(',')]
layers=[all_layers[i]foriinids]
iflen(layers)>1:
print('Concatenatingroutelayers:',layers)
concatenate_layer=Concatenate()(layers)
all_layers.append(concatenate_layer)
prev_layer=concatenate_layer
else:
skip_layer=layers[0]#onlyonelayertoroute
all_layers.append(skip_layer)
prev_layer=skip_layer

elifsection.startswith('maxpool'):
size=int(cfg_parser[section]['size'])
stride=int(cfg_parser[section]['stride'])
all_layers.append(
MaxPooling2D(
pool_size=(size,size),
strides=(stride,stride),
padding='same')(prev_layer))
prev_layer=all_layers[-1]

elifsection.startswith('shortcut'):
index=int(cfg_parser[section]['from'])
activation=cfg_parser[section]['activation']
assertactivation=='linear','Onlylinearactivationsupported.'
all_layers.append(Add()([all_layers[index],prev_layer]))
prev_layer=all_layers[-1]

elifsection.startswith('upsample'):
stride=int(cfg_parser[section]['stride'])
assertstride==2,'Onlystride=2supported.'
all_layers.append(UpSampling2D(stride)(prev_layer))
prev_layer=all_layers[-1]

elifsection.startswith('yolo'):
out_index.append(len(all_layers)-1)
all_layers.append(None)
prev_layer=all_layers[-1]

elifsection.startswith('net'):
pass

else:
raiseValueError(
'Unsupportedsectionheadertype:{}'.format(section))

#Createandsavemodel.
iflen(out_index)==0:out_index.append(len(all_layers)-1)
model=Model(inputs=input_layer,outputs=[all_layers[i]foriinout_index])
print(model.summary())
ifargs.weights_only:
model.save_weights('{}'.format(output_path))
print('SavedKerasweightsto{}'.format(output_path))
else:
model.save('{}'.format(output_path))
print('SavedKerasmodelto{}'.format(output_path))

#Checktoseeifallweightshavebeenread.
remaining_weights=len(weights_file.read())/4
weights_file.close()
print('Read{}of{}fromDarknetweights.'.format(count,count+
remaining_weights))
ifremaining_weights>0:
print('Warning:{}unusedweights'.format(remaining_weights))

ifargs.plot_model:
plot(model,to_file='{}.png'.format(output_root),show_shapes=True)
print('Savedmodelplotto{}.png'.format(output_root))


if__name__=='__main__':
_main(parser.parse_args())

模型訓練

1.建立yolo模型框架model.py

"""YOLO_v3ModelDefinedinKeras."""

fromfunctoolsimportwraps

importnumpyasnp
importtensorflowastf
fromkerasimportbackendasK
fromkeras.layersimportConv2D,Add,ZeroPadding2D,UpSampling2D,Concatenate,MaxPooling2D
fromkeras.layers.advanced_activationsimportLeakyReLU
fromkeras.layers.normalizationimportBatchNormalization
fromkeras.modelsimportModel
fromkeras.regularizersimportl2

fromyolo3.utilsimportcompose


@wraps(Conv2D)
defDarknetConv2D(*args,**kwargs):
"""WrappertosetDarknetparametersforConvolution2D."""
darknet_conv_kwargs={'kernel_regularizer':l2(5e-4)}
darknet_conv_kwargs['padding']='valid'ifkwargs.get('strides')==(2,2)else'same'
darknet_conv_kwargs.update(kwargs)
returnConv2D(*args,**darknet_conv_kwargs)

defDarknetConv2D_BN_Leaky(*args,**kwargs):
"""DarknetConvolution2DfollowedbyBatchNormalizationandLeakyReLU."""
no_bias_kwargs={'use_bias':False}
no_bias_kwargs.update(kwargs)
returncompose(
DarknetConv2D(*args,**no_bias_kwargs),
BatchNormalization(),
LeakyReLU(alpha=0.1))

defresblock_body(x,num_filters,num_blocks):
'''AseriesofresblocksstartingwithadownsamplingConvolution2D'''
#Darknetusesleftandtoppaddinginsteadof'same'mode
x=ZeroPadding2D(((1,0),(1,0)))(x)
x=DarknetConv2D_BN_Leaky(num_filters,(3,3),strides=(2,2))(x)
foriinrange(num_blocks):
y=compose(
DarknetConv2D_BN_Leaky(num_filters//2,(1,1)),
DarknetConv2D_BN_Leaky(num_filters,(3,3)))(x)
x=Add()([x,y])
returnx

defdarknet_body(x):
'''Darknentbodyhaving52Convolution2Dlayers'''
x=DarknetConv2D_BN_Leaky(32,(3,3))(x)
x=resblock_body(x,64,1)
x=resblock_body(x,128,2)
x=resblock_body(x,256,8)
x=resblock_body(x,512,8)
x=resblock_body(x,1024,4)
returnx

defmake_last_layers(x,num_filters,out_filters):
'''6Conv2D_BN_LeakylayersfollowedbyaConv2D_linearlayer'''
x=compose(
DarknetConv2D_BN_Leaky(num_filters,(1,1)),
DarknetConv2D_BN_Leaky(num_filters*2,(3,3)),
DarknetConv2D_BN_Leaky(num_filters,(1,1)),
DarknetConv2D_BN_Leaky(num_filters*2,(3,3)),
DarknetConv2D_BN_Leaky(num_filters,(1,1)))(x)
y=compose(
DarknetConv2D_BN_Leaky(num_filters*2,(3,3)),
DarknetConv2D(out_filters,(1,1)))(x)
returnx,y


defyolo_body(inputs,num_anchors,num_classes):
"""CreateYOLO_V3modelCNNbodyinKeras."""
darknet=Model(inputs,darknet_body(inputs))
x,y1=make_last_layers(darknet.output,512,num_anchors*(num_classes+5))

x=compose(
DarknetConv2D_BN_Leaky(256,(1,1)),
UpSampling2D(2))(x)
x=Concatenate()([x,darknet.layers[152].output])
x,y2=make_last_layers(x,256,num_anchors*(num_classes+5))

x=compose(
DarknetConv2D_BN_Leaky(128,(1,1)),
UpSampling2D(2))(x)
x=Concatenate()([x,darknet.layers[92].output])
x,y3=make_last_layers(x,128,num_anchors*(num_classes+5))

returnModel(inputs,[y1,y2,y3])

deftiny_yolo_body(inputs,num_anchors,num_classes):
'''CreateTinyYOLO_v3modelCNNbodyinkeras.'''
x1=compose(
DarknetConv2D_BN_Leaky(16,(3,3)),
MaxPooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
DarknetConv2D_BN_Leaky(32,(3,3)),
MaxPooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
DarknetConv2D_BN_Leaky(64,(3,3)),
MaxPooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
DarknetConv2D_BN_Leaky(128,(3,3)),
MaxPooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
DarknetConv2D_BN_Leaky(256,(3,3)))(inputs)
x2=compose(
MaxPooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
DarknetConv2D_BN_Leaky(512,(3,3)),
MaxPooling2D(pool_size=(2,2),strides=(1,1),padding='same'),
DarknetConv2D_BN_Leaky(1024,(3,3)),
DarknetConv2D_BN_Leaky(256,(1,1)))(x1)
y1=compose(
DarknetConv2D_BN_Leaky(512,(3,3)),
DarknetConv2D(num_anchors*(num_classes+5),(1,1)))(x2)

x2=compose(
DarknetConv2D_BN_Leaky(128,(1,1)),
UpSampling2D(2))(x2)
y2=compose(
Concatenate(),
DarknetConv2D_BN_Leaky(256,(3,3)),
DarknetConv2D(num_anchors*(num_classes+5),(1,1)))([x2,x1])

returnModel(inputs,[y1,y2])


defyolo_head(feats,anchors,num_classes,input_shape,calc_loss=False):
"""Convertfinallayerfeaturestoboundingboxparameters."""
num_anchors=len(anchors)
#Reshapetobatch,height,width,num_anchors,box_params.
anchors_tensor=K.reshape(K.constant(anchors),[1,1,1,num_anchors,2])

grid_shape=K.shape(feats)[1:3]#height,width
grid_y=K.tile(K.reshape(K.arange(0,stop=grid_shape[0]),[-1,1,1,1]),
[1,grid_shape[1],1,1])
grid_x=K.tile(K.reshape(K.arange(0,stop=grid_shape[1]),[1,-1,1,1]),
[grid_shape[0],1,1,1])
grid=K.concatenate([grid_x,grid_y])
grid=K.cast(grid,K.dtype(feats))

feats=K.reshape(
feats,[-1,grid_shape[0],grid_shape[1],num_anchors,num_classes+5])

#Adjustpreditionstoeachspatialgridpointandanchorsize.
box_xy=(K.sigmoid(feats[...,:2])+grid)/K.cast(grid_shape[::-1],K.dtype(feats))
box_wh=K.exp(feats[...,2:4])*anchors_tensor/K.cast(input_shape[::-1],K.dtype(feats))
box_confidence=K.sigmoid(feats[...,4:5])
box_class_probs=K.sigmoid(feats[...,5:])

ifcalc_loss==True:
returngrid,feats,box_xy,box_wh
returnbox_xy,box_wh,box_confidence,box_class_probs


defyolo_correct_boxes(box_xy,box_wh,input_shape,image_shape):
'''Getcorrectedboxes'''
box_yx=box_xy[...,::-1]
box_hw=box_wh[...,::-1]
input_shape=K.cast(input_shape,K.dtype(box_yx))
image_shape=K.cast(image_shape,K.dtype(box_yx))
new_shape=K.round(image_shape*K.min(input_shape/image_shape))
offset=(input_shape-new_shape)/2./input_shape
scale=input_shape/new_shape
box_yx=(box_yx-offset)*scale
box_hw*=scale

box_mins=box_yx-(box_hw/2.)
box_maxes=box_yx+(box_hw/2.)
boxes=K.concatenate([
box_mins[...,0:1],#y_min
box_mins[...,1:2],#x_min
box_maxes[...,0:1],#y_max
box_maxes[...,1:2]#x_max
])

#Scaleboxesbacktooriginalimageshape.
boxes*=K.concatenate([image_shape,image_shape])
returnboxes


defyolo_boxes_and_scores(feats,anchors,num_classes,input_shape,image_shape):
'''ProcessConvlayeroutput'''
box_xy,box_wh,box_confidence,box_class_probs=yolo_head(feats,
anchors,num_classes,input_shape)
boxes=yolo_correct_boxes(box_xy,box_wh,input_shape,image_shape)
boxes=K.reshape(boxes,[-1,4])
box_scores=box_confidence*box_class_probs
box_scores=K.reshape(box_scores,[-1,num_classes])
returnboxes,box_scores


defyolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=20,
score_threshold=.6,
iou_threshold=.5):
"""EvaluateYOLOmodelongiveninputandreturnfilteredboxes."""
num_layers=len(yolo_outputs)
anchor_mask=[[6,7,8],[3,4,5],[0,1,2]]ifnum_layers==3else[[3,4,5],[1,2,3]]#defaultsetting
input_shape=K.shape(yolo_outputs[0])[1:3]*32
boxes=[]
box_scores=[]
forlinrange(num_layers):
_boxes,_box_scores=yolo_boxes_and_scores(yolo_outputs[l],
anchors[anchor_mask[l]],num_classes,input_shape,image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes=K.concatenate(boxes,axis=0)
box_scores=K.concatenate(box_scores,axis=0)

mask=box_scores>=score_threshold
max_boxes_tensor=K.constant(max_boxes,dtype='int32')
boxes_=[]
scores_=[]
classes_=[]
forcinrange(num_classes):
#TODO:usekerasbackendinsteadoftf.
class_boxes=tf.boolean_mask(boxes,mask[:,c])
class_box_scores=tf.boolean_mask(box_scores[:,c],mask[:,c])
nms_index=tf.image.non_max_suppression(
class_boxes,class_box_scores,max_boxes_tensor,iou_threshold=iou_threshold)
class_boxes=K.gather(class_boxes,nms_index)
class_box_scores=K.gather(class_box_scores,nms_index)
classes=K.ones_like(class_box_scores,'int32')*c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_=K.concatenate(boxes_,axis=0)
scores_=K.concatenate(scores_,axis=0)
classes_=K.concatenate(classes_,axis=0)

returnboxes_,scores_,classes_


defpreprocess_true_boxes(true_boxes,input_shape,anchors,num_classes):
'''Preprocesstrueboxestotraininginputformat
Parameters
----------
true_boxes:array,shape=(m,T,5)
Absolutex_min,y_min,x_max,y_max,class_idrelativetoinput_shape.
input_shape:array-like,hw,multiplesof32
anchors:array,shape=(N,2),wh
num_classes:integer
Returns
-------
y_true:listofarray,shapelikeyolo_outputs,xywharereletivevalue
'''
assert(true_boxes[...,4]'classidmustbelessthannum_classes'
num_layers=len(anchors)//3#defaultsetting
anchor_mask=[[6,7,8],[3,4,5],[0,1,2]]ifnum_layers==3else[[3,4,5],[1,2,3]]
true_boxes=np.array(true_boxes,dtype='float32')
input_shape=np.array(input_shape,dtype='int32')
boxes_xy=(true_boxes[...,0:2]+true_boxes[...,2:4])//2
boxes_wh=true_boxes[...,2:4]-true_boxes[...,0:2]
true_boxes[...,0:2]=boxes_xy/input_shape[::-1]
true_boxes[...,2:4]=boxes_wh/input_shape[::-1]
m=true_boxes.shape[0]
grid_shapes=[input_shape//{0:32,1:16,2:8}[l]forlinrange(num_layers)]
y_true=[np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
dtype='float32')forlinrange(num_layers)]#Expanddimtoapplybroadcasting.
anchors=np.expand_dims(anchors,0)
anchor_maxes=anchors/2.
anchor_mins=-anchor_maxes
valid_mask=boxes_wh[...,0]>0forbinrange(m):#Discardzerorows.
wh=boxes_wh[b,valid_mask[b]]iflen(wh)==0:continue#Expanddimtoapplybroadcasting.
wh=np.expand_dims(wh,-2)
box_maxes=wh/2.
box_mins=-box_maxes
intersect_mins=np.maximum(box_mins,anchor_mins)
intersect_maxes=np.minimum(box_maxes,anchor_maxes)
intersect_wh=np.maximum(intersect_maxes-intersect_mins,0.)
intersect_area=intersect_wh[...,0]*intersect_wh[...,1]
box_area=wh[...,0]*wh[...,1]
anchor_area=anchors[...,0]*anchors[...,1]
iou=intersect_area/(box_area+anchor_area-intersect_area)#Findbestanchorforeachtruebox
best_anchor=np.argmax(iou,axis=-1)fort,ninenumerate(best_anchor):forlinrange(num_layers):ifninanchor_mask[l]:
i=np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
j=np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
k=anchor_mask[l].index(n)
c=true_boxes[b,t,4].astype('int32')
y_true[l][b,j,i,k,0:4]=true_boxes[b,t,0:4]
y_true[l][b,j,i,k,4]=1
y_true[l][b,j,i,k,5+c]=1returny_truedefbox_iou(b1,b2):'''Returnioutensor
Parameters
----------
b1:tensor,shape=(i1,...,iN,4),xywh
b2:tensor,shape=(j,4),xywh
Returns
-------
iou:tensor,shape=(i1,...,iN,j)
'''#Expanddimtoapplybroadcasting.
b1=K.expand_dims(b1,-2)
b1_xy=b1[...,:2]
b1_wh=b1[...,2:4]
b1_wh_half=b1_wh/2.
b1_mins=b1_xy-b1_wh_half
b1_maxes=b1_xy+b1_wh_half#Expanddimtoapplybroadcasting.
b2=K.expand_dims(b2,0)
b2_xy=b2[...,:2]
b2_wh=b2[...,2:4]
b2_wh_half=b2_wh/2.
b2_mins=b2_xy-b2_wh_half
b2_maxes=b2_xy+b2_wh_half
intersect_mins=K.maximum(b1_mins,b2_mins)
intersect_maxes=K.minimum(b1_maxes,b2_maxes)
intersect_wh=K.maximum(intersect_maxes-intersect_mins,0.)
intersect_area=intersect_wh[...,0]*intersect_wh[...,1]
b1_area=b1_wh[...,0]*b1_wh[...,1]
b2_area=b2_wh[...,0]*b2_wh[...,1]
iou=intersect_area/(b1_area+b2_area-intersect_area)returnioudefyolo_loss(args,anchors,num_classes,ignore_thresh=.5,print_loss=False):'''Returnyolo_losstensor
Parameters
----------
yolo_outputs:listoftensor,theoutputofyolo_bodyortiny_yolo_body
y_true:listofarray,theoutputofpreprocess_true_boxes
anchors:array,shape=(N,2),wh
num_classes:integer
ignore_thresh:float,theiouthresholdwhethertoignoreobjectconfidenceloss
Returns
-------
loss:tensor,shape=(1,)
'''
num_layers=len(anchors)//3#defaultsetting
yolo_outputs=args[:num_layers]
y_true=args[num_layers:]
anchor_mask=[[6,7,8],[3,4,5],[0,1,2]]ifnum_layers==3else[[3,4,5],[1,2,3]]
input_shape=K.cast(K.shape(yolo_outputs[0])[1:3]*32,K.dtype(y_true[0]))
grid_shapes=[K.cast(K.shape(yolo_outputs[l])[1:3],K.dtype(y_true[0]))forlinrange(num_layers)]
loss=0
m=K.shape(yolo_outputs[0])[0]#batchsize,tensor
mf=K.cast(m,K.dtype(yolo_outputs[0]))forlinrange(num_layers):
object_mask=y_true[l][...,4:5]
true_class_probs=y_true[l][...,5:]
grid,raw_pred,pred_xy,pred_wh=yolo_head(yolo_outputs[l],
anchors[anchor_mask[l]],num_classes,input_shape,calc_loss=True)
pred_box=K.concatenate([pred_xy,pred_wh])#Darknetrawboxtocalculateloss.
raw_true_xy=y_true[l][...,:2]*grid_shapes[l][::-1]-grid
raw_true_wh=K.log(y_true[l][...,2:4]/anchors[anchor_mask[l]]*input_shape[::-1])
raw_true_wh=K.switch(object_mask,raw_true_wh,K.zeros_like(raw_true_wh))#avoidlog(0)=-inf
box_loss_scale=2-y_true[l][...,2:3]*y_true[l][...,3:4]#Findignoremask,iterateovereachofbatch.
ignore_mask=tf.TensorArray(K.dtype(y_true[0]),size=1,dynamic_size=True)
object_mask_bool=K.cast(object_mask,'bool')defloop_body(b,ignore_mask):
true_box=tf.boolean_mask(y_true[l][b,...,0:4],object_mask_bool[b,...,0])
iou=box_iou(pred_box[b],true_box)
best_iou=K.max(iou,axis=-1)
ignore_mask=ignore_mask.write(b,K.cast(best_ioureturnb+1,ignore_mask
_,ignore_mask=K.control_flow_ops.while_loop(lambdab,*args:b0,ignore_mask])
ignore_mask=ignore_mask.stack()
ignore_mask=K.expand_dims(ignore_mask,-1)#K.binary_crossentropyishelpfultoavoidexpoverflow.
xy_loss=object_mask*box_loss_scale*K.binary_crossentropy(raw_true_xy,raw_pred[...,0:2],from_logits=True)
wh_loss=object_mask*box_loss_scale*0.5*K.square(raw_true_wh-raw_pred[...,2:4])
confidence_loss=object_mask*K.binary_crossentropy(object_mask,raw_pred[...,4:5],from_logits=True)+\
(1-object_mask)*K.binary_crossentropy(object_mask,raw_pred[...,4:5],from_logits=True)*ignore_mask
class_loss=object_mask*K.binary_crossentropy(true_class_probs,raw_pred[...,5:],from_logits=True)
xy_loss=K.sum(xy_loss)/mf
wh_loss=K.sum(wh_loss)/mf
confidence_loss=K.sum(confidence_loss)/mf
class_loss=K.sum(class_loss)/mf
loss+=xy_loss+wh_loss+confidence_loss+class_lossifprint_loss:
loss=tf.Print(loss,[loss,xy_loss,wh_loss,confidence_loss,class_loss,K.sum(ignore_mask)],message='loss:')returnloss

2.utils.py

"""Miscellaneousutilityfunctions."""

fromfunctoolsimportreduce

fromPILimportImage
importnumpyasnp
frommatplotlib.colorsimportrgb_to_hsv,hsv_to_rgb

defcompose(*funcs):
"""Composearbitrarilymanyfunctions,evaluatedlefttoright.
Reference:https://mathieularose.com/function-composition-in-python/
"""
#returnlambdax:reduce(lambdav,f:f(v),funcs,x)
iffuncs:
returnreduce(lambdaf,g:lambda*a,**kw:g(f(*a,**kw)),funcs)
else:
raiseValueError('Compositionofemptysequencenotsupported.')

defletterbox_image(image,size):
'''resizeimagewithunchangedaspectratiousingpadding'''
iw,ih=image.size
w,h=size
scale=min(w/iw,h/ih)
nw=int(iw*scale)
nh=int(ih*scale)

image=image.resize((nw,nh),Image.BICUBIC)
new_image=Image.new('RGB',size,(128,128,128))
new_image.paste(image,((w-nw)//2,(h-nh)//2))
returnnew_image

defrand(a=0,b=1):
returnnp.random.rand()*(b-a)+a

defget_random_data(annotation_line,input_shape,random=True,max_boxes=20,jitter=.3,hue=.1,sat=1.5,val=1.5,proc_img=True):
'''randompreprocessingforreal-timedataaugmentation'''
line=annotation_line.split()
image=Image.open(line[0])
iw,ih=image.size
h,w=input_shape
box=np.array([np.array(list(map(int,box.split(','))))forboxinline[1:]])

ifnotrandom:
#resizeimage
scale=min(w/iw,h/ih)
nw=int(iw*scale)
nh=int(ih*scale)
dx=(w-nw)//2
dy=(h-nh)//2
image_data=0
ifproc_img:
image=image.resize((nw,nh),Image.BICUBIC)
new_image=Image.new('RGB',(w,h),(128,128,128))
new_image.paste(image,(dx,dy))
image_data=np.array(new_image)/255.

#correctboxes
box_data=np.zeros((max_boxes,5))
iflen(box)>0:
np.random.shuffle(box)
iflen(box)>max_boxes:box=box[:max_boxes]
box[:,[0,2]]=box[:,[0,2]]*scale+dx
box[:,[1,3]]=box[:,[1,3]]*scale+dy
box_data[:len(box)]=box

returnimage_data,box_data

#resizeimage
new_ar=w/h*rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
scale=rand(.25,2)
ifnew_ar1:
nh=int(scale*h)
nw=int(nh*new_ar)
else:
nw=int(scale*w)
nh=int(nw/new_ar)
image=image.resize((nw,nh),Image.BICUBIC)

#placeimage
dx=int(rand(0,w-nw))
dy=int(rand(0,h-nh))
new_image=Image.new('RGB',(w,h),(128,128,128))
new_image.paste(image,(dx,dy))
image=new_image

#flipimageornot
flip=rand()<.5
ifflip:image=image.transpose(Image.FLIP_LEFT_RIGHT)

#distortimage
hue=rand(-hue,hue)
sat=rand(1,sat)ifrand()<.5else1/rand(1,sat)
val=rand(1,val)ifrand()<.5else1/rand(1,val)
x=rgb_to_hsv(np.array(image)/255.)
x[...,0]+=hue
x[...,0][x[...,0]>1]-=1
x[...,0][x[...,0]<0]+=1
x[...,1]*=sat
x[...,2]*=val
x[x>1]=1
x[x<0]=0
image_data=hsv_to_rgb(x)#numpyarray,0to1

#correctboxes
box_data=np.zeros((max_boxes,5))
iflen(box)>0:
np.random.shuffle(box)
box[:,[0,2]]=box[:,[0,2]]*nw/iw+dx
box[:,[1,3]]=box[:,[1,3]]*nh/ih+dy
ifflip:box[:,[0,2]]=w-box[:,[2,0]]
box[:,0:2][box[:,0:2]<0]=0
box[:,2][box[:,2]>w]=w
box[:,3][box[:,3]>h]=h
box_w=box[:,2]-box[:,0]
box_h=box[:,3]-box[:,1]
box=box[np.logical_and(box_w>1,box_h>1)]#discardinvalidbox
iflen(box)>max_boxes:box=box[:max_boxes]
box_data[:len(box)]=box

returnimage_data,box_data

3.yolo.py

#-*-coding:utf-8-*-
"""
ClassdefinitionofYOLO_v3styledetectionmodelonimageandvideo
"""

importcolorsys
importos
fromtimeitimportdefault_timerastimer

importnumpyasnp
fromkerasimportbackendasK
fromkeras.modelsimportload_model
fromkeras.layersimportInput
fromPILimportImage,ImageFont,ImageDraw

fromyolo3.modelimportyolo_eval,yolo_body,tiny_yolo_body
fromyolo3.utilsimportletterbox_image
importos
fromkeras.utilsimportmulti_gpu_model

classYOLO(object):
_defaults={
"model_path":'logs/000/trained_weights_final.h5',#儲存模型路徑
"anchors_path":'model_data/yolo_anchors.txt',
"classes_path":'model_data/voc_classes.txt',
"score":0.3,
"iou":0.45,
"model_image_size":(416,416),#一般設定
"gpu_num":1,#根據自己的設定
}

@classmethod
defget_defaults(cls,n):
ifnincls._defaults:
returncls._defaults[n]
else:
return"Unrecognizedattributename'"+n+"'"

def__init__(self,**kwargs):
self.__dict__.update(self._defaults)#setupdefaultvalues
self.__dict__.update(kwargs)#andupdatewithuseroverrides
self.class_names=self._get_class()
self.anchors=self._get_anchors()
self.sess=K.get_session()
self.boxes,self.scores,self.classes=self.generate()

def_get_class(self):
classes_path=os.path.expanduser(self.classes_path)
withopen(classes_path)asf:
class_names=f.readlines()
class_names=[c.strip()forcinclass_names]
returnclass_names

def_get_anchors(self):
anchors_path=os.path.expanduser(self.anchors_path)
withopen(anchors_path)asf:
anchors=f.readline()
anchors=[float(x)forxinanchors.split(',')]
returnnp.array(anchors).reshape(-1,2)

defgenerate(self):
model_path=os.path.expanduser(self.model_path)
assertmodel_path.endswith('.h5'),'Kerasmodelorweightsmustbea.h5file.'

#Loadmodel,orconstructmodelandloadweights.
num_anchors=len(self.anchors)
num_classes=len(self.class_names)
is_tiny_version=num_anchors==6#defaultsetting
try:
self.yolo_model=load_model(model_path,compile=False)
except:
self.yolo_model=tiny_yolo_body(Input(shape=(None,None,3)),num_anchors//2,num_classes)\
ifis_tiny_versionelseyolo_body(Input(shape=(None,None,3)),num_anchors//3,num_classes)
self.yolo_model.load_weights(self.model_path)#makesuremodel,anchorsandclassesmatch
else:
assertself.yolo_model.layers[-1].output_shape[-1]==\
num_anchors/len(self.yolo_model.output)*(num_classes+5),\
'Mismatchbetweenmodelandgivenanchorandclasssizes'

print('{}model,anchors,andclassesloaded.'.format(model_path))

#Generatecolorsfordrawingboundingboxes.
hsv_tuples=[(x/len(self.class_names),1.,1.)
forxinrange(len(self.class_names))]
self.colors=list(map(lambdax:colorsys.hsv_to_rgb(*x),hsv_tuples))
self.colors=list(
map(lambdax:(int(x[0]*255),int(x[1]*255),int(x[2]*255)),
self.colors))
np.random.seed(10101)#Fixedseedforconsistentcolorsacrossruns.
np.random.shuffle(self.colors)#Shufflecolorstodecorrelateadjacentclasses.
np.random.seed(None)#Resetseedtodefault.

#Generateoutputtensortargetsforfilteredboundingboxes.
self.input_image_shape=K.placeholder(shape=(2,))
ifself.gpu_num>=2:
self.yolo_model=multi_gpu_model(self.yolo_model,gpus=self.gpu_num)
boxes,scores,classes=yolo_eval(self.yolo_model.output,self.anchors,
len(self.class_names),self.input_image_shape,
score_threshold=self.score,iou_threshold=self.iou)
returnboxes,scores,classes

defdetect_image(self,image):
start=timer()

ifself.model_image_size!=(None,None):
assertself.model_image_size[0]%32==0,'Multiplesof32required'
assertself.model_image_size[1]%32==0,'Multiplesof32required'
boxed_image=letterbox_image(image,tuple(reversed(self.model_image_size)))
else:
new_image_size=(image.width-(image.width%32),
image.height-(image.height%32))
boxed_image=letterbox_image(image,new_image_size)
image_data=np.array(boxed_image,dtype='float32')

print(image_data.shape)
image_data/=255.
image_data=np.expand_dims(image_data,0)#Addbatchdimension.

out_boxes,out_scores,out_classes=self.sess.run(
[self.boxes,self.scores,self.classes],
feed_dict={
self.yolo_model.input:image_data,
self.input_image_shape:[image.size[1],image.size[0]],
K.learning_phase():0
})
output=[out_boxes,out_scores,out_classes]
print('Found{}boxesfor{}'.format(len(out_boxes),'img'))

font=ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2*image.size[1]+0.5).astype('int32'))
thickness=(image.size[0]+image.size[1])//100

fori,cinreversed(list(enumerate(out_classes))):
predicted_class=self.class_names[c]
box=out_boxes[i]
score=out_scores[i]

label='{}{:.2f}'.format(predicted_class,score)
draw=ImageDraw.Draw(image)
label_size=draw.textsize(label,font)

top,left,bottom,right=box
top=max(0,np.floor(top+0.5).astype('int32'))
left=max(0,np.floor(left+0.5).astype('int32'))
bottom=min(image.size[1],np.floor(bottom+0.5).astype('int32'))
right=min(image.size[0],np.floor(right+0.5).astype('int32'))
print(label,(left,top),(right,bottom))

iftop-label_size[1]>=0:
text_origin=np.array([left,top-label_size[1]])
else:
text_origin=np.array([left,top+1])

#Mykingdomforagoodredistributableimagedrawinglibrary.
foriinrange(thickness):
draw.rectangle(
[left+i,top+i,right-i,bottom-i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin),tuple(text_origin+label_size)],
fill=self.colors[c])
draw.text(text_origin,label,fill=(0,0,0),font=font)
deldraw

end=timer()
print(end-start)
returnimage,output

defclose_session(self):
self.sess.close()

defdetect_video(yolo,video_path,output_path=""):
importcv2
vid=cv2.VideoCapture(video_path)
ifnotvid.isOpened():
raiseIOError("Couldn'topenwebcamorvideo")
video_FourCC=int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps=vid.get(cv2.CAP_PROP_FPS)
video_size=(int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput=Trueifoutput_path!=""elseFalse
ifisOutput:
print("!!!TYPE:",type(output_path),type(video_FourCC),type(video_fps),type(video_size))
out=cv2.VideoWriter(output_path,video_FourCC,video_fps,video_size)
accum_time=0
curr_fps=0
fps="FPS:??"
prev_time=timer()
whileTrue:
return_value,frame=vid.read()
image=Image.fromarray(frame)
image,output=yolo.detect_image(image)
result=np.asarray(image)
curr_time=timer()
exec_time=curr_time-prev_time
prev_time=curr_time
accum_time=accum_time+exec_time
curr_fps=curr_fps+1
ifaccum_time>1:
accum_time=accum_time-1
fps="FPS:"+str(curr_fps)
curr_fps=0
cv2.putText(result,text=fps,org=(3,15),fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50,color=(255,0,0),thickness=2)
cv2.namedWindow("result",cv2.WINDOW_NORMAL)
cv2.imshow("result",result)
ifisOutput:
out.write(result)
ifcv2.waitKey(1)&0xFF==ord('q'):
break
yolo.close_session()


defdetect_camera(yolo,camera_id):
importcv2
cap=cv2.VideoCapture(0)
cap.set(3,640)
cap.set(4,480)
ifcap.isOpened():
print('cameraopensuccessfully!')
else:
raiseIOError("Couldn'topencamera!")
accum_time=0
curr_fps=0
fps="FPS:??"
prev_time=timer()
whileTrue:
return_value,frame=cap.read()
image=Image.fromarray(frame)
image,output=yolo.detect_image(image)
result=np.asarray(image)
curr_time=timer()
exec_time=curr_time-prev_time
prev_time=curr_time
accum_time=accum_time+exec_time
curr_fps=curr_fps+1
ifaccum_time>1:
accum_time=accum_time-1
fps="FPS:"+str(curr_fps)
curr_fps=0
cv2.putText(result,text=fps,org=(3,15),fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50,color=(255,0,0),thickness=2)
cv2.namedWindow("result",cv2.WINDOW_NORMAL)
cv2.imshow("result",result)
ifcv2.waitKey(1)&0xFF==ord('q'):
break
yolo.close_session()

4.訓練檔案train.py

"""
RetraintheYOLOmodelforyourowndataset.
"""

importnumpyasnp
importkeras.backendasK
fromkeras.layersimportInput,Lambda
fromkeras.modelsimportModel
fromkeras.optimizersimportAdam
fromkeras.callbacksimportTensorBoard,ModelCheckpoint,ReduceLROnPlateau,EarlyStopping

fromyolo3.modelimportpreprocess_true_boxes,yolo_body,tiny_yolo_body,yolo_loss
fromyolo3.utilsimportget_random_data


def_main():
annotation_path='2007_train.txt'
log_dir='logs/000/'
classes_path='model_data/voc_classes.txt'
anchors_path='model_data/yolo_anchors.txt'
class_names=get_classes(classes_path)
num_classes=len(class_names)
anchors=get_anchors(anchors_path)

input_shape=(416,416)#multipleof32,hw

is_tiny_version=len(anchors)==6#defaultsetting
ifis_tiny_version:
model=create_tiny_model(input_shape,anchors,num_classes,
freeze_body=2,weights_path='model_data/yolo-tiny.h5')
else:
model=create_model(input_shape,anchors,num_classes,
freeze_body=2,weights_path='model_data/yolo.h5')#makesureyouknowwhatyoufreeze

logging=TensorBoard(log_dir=log_dir)
checkpoint=ModelCheckpoint(log_dir+'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss',save_weights_only=True,save_best_only=True,period=3)
reduce_lr=ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=3,verbose=1)
early_stopping=EarlyStopping(monitor='val_loss',min_delta=0,patience=10,verbose=1)

val_split=0.1
withopen(annotation_path)asf:
lines=f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val=int(len(lines)*val_split)
num_train=len(lines)-num_val

#Trainwithfrozenlayersfirst,togetastableloss.
#Adjustnumepochstoyourdataset.Thisstepisenoughtoobtainanotbadmodel.
ifTrue:
model.compile(optimizer=Adam(lr=1e-3),loss={
#usecustomyolo_lossLambdalayer.
'yolo_loss':lambday_true,y_pred:y_pred})

batch_size=5
print('Trainon{}samples,valon{}samples,withbatchsize{}.'.format(num_train,num_val,batch_size))
model.fit_generator(data_generator_wrapper(lines[:num_train],batch_size,input_shape,anchors,num_classes),
steps_per_epoch=max(1,num_train//batch_size),
validation_data=data_generator_wrapper(lines[num_train:],batch_size,input_shape,anchors,num_classes),
validation_steps=max(1,num_val//batch_size),
epochs=50,
initial_epoch=0,
callbacks=[logging,checkpoint])
model.save_weights(log_dir+'trained_weights_stage_1.h5')

#Unfreezeandcontinuetraining,tofine-tune.
#Trainlongeriftheresultisnotgood.
ifTrue:
foriinrange(len(model.layers)):
model.layers[i].trainable=True
model.compile(optimizer=Adam(lr=1e-4),loss={'yolo_loss':lambday_true,y_pred:y_pred})#recompiletoapplythechange
print('Unfreezeallofthelayers.')

batch_size=5#notethatmoreGPUmemoryisrequiredafterunfreezingthebody
print('Trainon{}samples,valon{}samples,withbatchsize{}.'.format(num_train,num_val,batch_size))
model.fit_generator(data_generator_wrapper(lines[:num_train],batch_size,input_shape,anchors,num_classes),
steps_per_epoch=max(1,num_train//batch_size),
validation_data=data_generator_wrapper(lines[num_train:],batch_size,input_shape,anchors,num_classes),
validation_steps=max(1,num_val//batch_size),
epochs=200,
initial_epoch=50,
callbacks=[logging,checkpoint,reduce_lr,early_stopping])
model.save_weights(log_dir+'trained_weights_final.h5')

#Furthertrainingifneeded.


defget_classes(classes_path):
'''loadstheclasses'''
withopen(classes_path)asf:
class_names=f.readlines()
class_names=[c.strip()forcinclass_names]
returnclass_names

defget_anchors(anchors_path):
'''loadstheanchorsfromafile'''
withopen(anchors_path)asf:
anchors=f.readline()
anchors=[float(x)forxinanchors.split(',')]
returnnp.array(anchors).reshape(-1,2)


defcreate_model(input_shape,anchors,num_classes,load_pretrained=True,freeze_body=2,
weights_path='model_data/yolo_weights.h5'):
'''createthetrainingmodel'''
K.clear_session()#getanewsession
image_input=Input(shape=(None,None,3))
h,w=input_shape
num_anchors=len(anchors)

y_true=[Input(shape=(h//{0:32,1:16,2:8}[l],w//{0:32,1:16,2:8}[l],\
num_anchors//3,num_classes+5))forlinrange(3)]

model_body=yolo_body(image_input,num_anchors//3,num_classes)
print('CreateYOLOv3modelwith{}anchorsand{}classes.'.format(num_anchors,num_classes))

ifload_pretrained:
model_body.load_weights(weights_path,by_name=True,skip_mismatch=True)
print('Loadweights{}.'.format(weights_path))
iffreeze_bodyin[1,2]:
#Freezedarknet53bodyorfreezeallbut3outputlayers.
num=(185,len(model_body.layers)-3)[freeze_body-1]
foriinrange(num):model_body.layers[i].trainable=False
print('Freezethefirst{}layersoftotal{}layers.'.format(num,len(model_body.layers)))

model_loss=Lambda(yolo_loss,output_shape=(1,),name='yolo_loss',
arguments={'anchors':anchors,'num_classes':num_classes,'ignore_thresh':0.5})(
[*model_body.output,*y_true])
model=Model([model_body.input,*y_true],model_loss)

returnmodel

defcreate_tiny_model(input_shape,anchors,num_classes,load_pretrained=True,freeze_body=2,
weights_path='model_data/tiny_yolo_weights.h5'):
'''createthetrainingmodel,forTinyYOLOv3'''
K.clear_session()#getanewsession
image_input=Input(shape=(None,None,3))
h,w=input_shape
num_anchors=len(anchors)

y_true=[Input(shape=(h//{0:32,1:16}[l],w//{0:32,1:16}[l],\
num_anchors//2,num_classes+5))forlinrange(2)]

model_body=tiny_yolo_body(image_input,num_anchors//2,num_classes)
print('CreateTinyYOLOv3modelwith{}anchorsand{}classes.'.format(num_anchors,num_classes))

ifload_pretrained:
model_body.load_weights(weights_path,by_name=True,skip_mismatch=True)
print('Loadweights{}.'.format(weights_path))
iffreeze_bodyin[1,2]:
#Freezethedarknetbodyorfreezeallbut2outputlayers.
num=(20,len(model_body.layers)-2)[freeze_body-1]
foriinrange(num):model_body.layers[i].trainable=False
print('Freezethefirst{}layersoftotal{}layers.'.format(num,len(model_body.layers)))

model_loss=Lambda(yolo_loss,output_shape=(1,),name='yolo_loss',
arguments={'anchors':anchors,'num_classes':num_classes,'ignore_thresh':0.7})(
[*model_body.output,*y_true])
model=Model([model_body.input,*y_true],model_loss)

returnmodel

defdata_generator(annotation_lines,batch_size,input_shape,anchors,num_classes):
'''datageneratorforfit_generator'''
n=len(annotation_lines)
i=0
whileTrue:
image_data=[]
box_data=[]
forbinrange(batch_size):
ifi==0:
np.random.shuffle(annotation_lines)
image,box=get_random_data(annotation_lines[i],input_shape,random=True)
image_data.append(image)
box_data.append(box)
i=(i+1)%n
image_data=np.array(image_data)
box_data=np.array(box_data)
y_true=preprocess_true_boxes(box_data,input_shape,anchors,num_classes)
yield[image_data,*y_true],np.zeros(batch_size)

defdata_generator_wrapper(annotation_lines,batch_size,input_shape,anchors,num_classes):
n=len(annotation_lines)
ifn==0orbatch_size<=0:returnNone
returndata_generator(annotation_lines,batch_size,input_shape,anchors,num_classes)

if__name__=='__main__':
_main()

圖片測試

importsys
importargparse
fromyoloimportYOLO,detect_video,detect_camera
fromPILimportImage

defdetect_img(yolo):
whileTrue:
img=input('Inputimagefilename:')
try:
image=Image.open(img)
except:
print('OpenError!Tryagain!')
continue
else:
r_image,output=yolo.detect_image(image)
r_image.show()
yolo.close_session()

FLAGS=None

if__name__=='__main__':
#classYOLOdefinesthedefaultvalue,sosuppressanydefaulthere
parser=argparse.ArgumentParser(argument_default=argparse.SUPPRESS)
'''
Commandlineoptions
'''
parser.add_argument(
'--model',type=str,
help='pathtomodelweightfile,default'+YOLO.get_defaults("model_path")
)

parser.add_argument(
'--anchors',type=str,
help='pathtoanchordefinitions,default'+YOLO.get_defaults("anchors_path")
)

parser.add_argument(
'--classes',type=str,
help='pathtoclassdefinitions,default'+YOLO.get_defaults("classes_path")
)

parser.add_argument(
'--gpu_num',type=int,
help='NumberofGPUtouse,default'+str(YOLO.get_defaults("gpu_num"))
)
'''
Commandlinepositionalarguments--forimagedetectionmode
'''
parser.add_argument(
'--image',default=False,action="store_true",
help='Imagedetectionmode,willignoreallpositionalarguments'
)
'''
Commandlinepositionalarguments--forvideodetectionmode
'''
parser.add_argument(
"--video_input",nargs='?',type=str,default=False,
help="Videoinputpath"
)

parser.add_argument(
"--video_output",nargs='?',type=str,default="",
help="[Optional]Videooutputpath"
)
'''
Commandlinepositionalarguments--forcamerareal-timedetectionmode
'''
parser.add_argument(
"--camera",type=str,default='0',
help="cameraid"
)


FLAGS=parser.parse_args()

ifFLAGS.image:
"""
Imagedetectionmode,disregardanyremainingcommandlinearguments
"""
print("Imagedetectionmode")
if"input"inFLAGS:
print("Ignoringremainingcommandlinearguments:"+FLAGS.input+","+FLAGS.output)
detect_img(YOLO(**vars(FLAGS)))
ifFLAGS.video_input:
detect_video(YOLO(**vars(FLAGS)),FLAGS.video_input,FLAGS.video_output)
ifFLAGS.camera:
detect_camera(YOLO(**vars(FLAGS)),int(FLAGS.camera))
else:
print("Mustspecifyatleastvideo_input_path.Seeusagewith--help.")

針對訓練集進行驗證

importos
fromyoloimportYOLO,detect_video,detect_camera
fromPILimportImage

defmain():
yolo=YOLO()
withopen('2007_test.txt','r')asf:
lines=f.readlines()
pic=[t.strip('\n')fortinlines]

withopen('model_data\\voc_classes.txt')asf:
class_names=f.readlines()
class_names=[c.strip('\n')forcinclass_names]
foriinrange(len(pic)):
f1=open('Object-Detection-Metrics-master/groundtruths/2007_'+pic[i].split('.')[0].split('/')[-1]+'.txt','w')
forjinrange(len(pic[i].split(''))):
ifj>0:
class_name=class_names[int(pic[i].split('')[j].split(',')[-1])]
box=pic[i].split('')[j].split(',')[0]+''+pic[i].split('')[j].split(',')[1]+''+pic[i].split('')[j].split(',')[2]+''+pic[i].split('')[j].split(',')[3]+'\n'
content=class_name+''+box
f1.write(content)
f1.close()

image=Image.open(pic[i].split('')[0])
r_image,output=yolo.detect_image(image)

f2=open('Object-Detection-Metrics-master/detections/2007_'+pic[i].split('.')[0].split('/')[-1]+'.txt','w')
fori,cinreversed(list(enumerate(output[2]))):
box=output[0][i]
score=output[1][i]
top,left,bottom,right=box
content='%s%.5f%d%d%d%d\n'%(class_names[c],score,left,top,right,bottom)
f2.write(content)
f2.close()


if__name__=='__main__':
main()