SSD模型實現工件裂紋檢測
阿新 • • 發佈:2018-12-18
一、介紹
本博文主要介紹實現通過SSD物體檢測方式實現工件裂紋檢測。裂紋影象如下所示:
二、關於SSD演算法
三、訓練資料的製作
這裡使用的是VOC2007的資料格式,資料夾下面一共三個子資料夾。 其中,Annotations資料夾存放的是LbaelImg製作資料生成的xml檔案。
JPEGImages存放的是原影象,.jpg格式。 ImageSets下面有一個Main資料夾,Main資料夾下面主要是四個txt檔案。 分別對應訓練集、測試集、驗證集等。該資料夾中的四個txt檔案,是從Annotations資料夾中隨機選取的影象名稱,並按照一定的比例劃分。
實現原始碼如下:
import os import random trainval_percent = 0.9 train_percent = 0.9 xmlfilepath = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_two_class/Annotations' txtsavepath = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_two_class/ImageSets/Main' total_xml = os.listdir(xmlfilepath) num=len(total_xml) list=range(num) tv=int(num*trainval_percent) tr=int(tv*train_percent) trainval= random.sample(list,tv) train=random.sample(trainval,tr) ftrainval = open(txtsavepath+'/trainval.txt', 'w') ftest = open(txtsavepath+'/test.txt', 'w') ftrain = open(txtsavepath+'/train.txt', 'w') fval = open(txtsavepath+'/val.txt', 'w') for i in list: name=total_xml[i][:-4]+'\n' if i in trainval: ftrainval.write(name) if i in train: ftrain.write(name) else: fval.write(name) else: ftest.write(name) ftrainval.close() ftrain.close() fval.close() ftest .close()
四、訓練資料
訓練資料的檔案為train_ssd300.py,顧名思義就是影象的輸入是300x300,不過不用擔心,程式碼內部已經實現轉換的程式,可以輸入任意尺寸的影象,原始碼如下:
from keras.optimizers import Adam, SGD from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger from keras import backend as K from keras.models import load_model from math import ceil import numpy as np from matplotlib import pyplot as plt from models.keras_ssd300 import ssd_300 from keras_loss_function.keras_ssd_loss import SSDLoss from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes from keras_layers.keras_layer_DecodeDetections import DecodeDetections from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast from keras_layers.keras_layer_L2Normalization import L2Normalization from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast from data_generator.object_detection_2d_data_generator import DataGenerator from data_generator.object_detection_2d_geometric_ops import Resize from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms import tensorflow as tf from keras import backend as K from focal_loss import focal_loss img_height = 300 # Height of the model input images img_width = 300 # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 1 # 類的數量,不算背景 scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets #一共在六個不同scale層次上進行取樣,最後一個1.05應該是無效的,scales中的數字代表生成檢測框的長度是feature map的長度的0.1,0.2,0.37,0.54.。。倍, # 長寬比例對應在aspect_ratios中,不同scale取樣的anchor數量和比例也不相同 scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True # 載入或者重新建立一個模型,二者選其一 # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # TODO: Set the path to the weights you want to load. weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' model.load_weights(weights_path, by_name=True) model.summary() # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. # adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) sgd = SGD(lr=0.0001, momentum=0.9, decay=0.001, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss, metrics=['accuracy']) # model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) #模型載入結束 # 注意,這裡出現了梯度爆炸 #載入資料 # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # TODO: Set the paths to the datasets here. # The directories that contain the images. VOC_2007_images_dir = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/JPEGImages/' # VOC_2012_images_dir = '../../datasets/VOCdevkit/VOC2012/JPEGImages/' # The directories that contain the annotations. VOC_2007_annotations_dir = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/Annotations/' # VOC_2012_annotations_dir = '../../datasets/VOCdevkit/VOC2012/Annotations/' # The paths to the image sets. VOC_2007_train_image_set_filename = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/train.txt' # VOC_2012_train_image_set_filename = '../../datasets/VOCdevkit/VOC2012/ImageSets/Main/train.txt' VOC_2007_val_image_set_filename = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/val.txt' # VOC_2012_val_image_set_filename = '../../datasets/VOCdevkit/VOC2012/ImageSets/Main/val.txt' VOC_2007_trainval_image_set_filename = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/trainval.txt' # VOC_2012_trainval_image_set_filename = '../../datasets/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt' VOC_2007_test_image_set_filename = 'F:/competition code/ssd_keras-master/ssd_keras-master/data/liewen_expand/ImageSets/Main/test.txt' # The XML parser needs to now what object class names to look for and in which order to map them to integers. # classes = ['background', # 'aeroplane', 'bicycle', 'bird', 'boat', # 'bottle', 'bus', 'car', 'cat', # 'chair', 'cow', 'diningtable', 'dog', # 'horse', 'motorbike', 'person', 'pottedplant', # 'sheep', 'sofa', 'train', 'tvmonitor'] classes = ['background','neg']#類的名稱,此時要加上background train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_trainval_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) # Optional: Convert the dataset into an HDF5 dataset. This will require more disk space, but will # speed up the training. Doing this is not relevant in case you activated the `load_images_into_memory` # option in the constructor, because in that cas the images are in memory already anyway. If you don't # want to create HDF5 datasets, comment out the subsequent two function calls. train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) # 3: Set the batch size. batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size)) print("cuiwei") def lr_schedule(epoch):#通過回撥函式設定學習率 if epoch < 80: return 0.0001 elif epoch < 100: return 0.0001 else: return 0.00001 # Define model callbacks. # TODO: Set the filepath under which you want to save the model. model_checkpoint = ModelCheckpoint(filepath='ssd300_model_liehen_expand.h5',#模型儲存名稱 monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) #model_checkpoint.best = csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = 20 steps_per_epoch = 80 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size/batch_size), initial_epoch=initial_epoch)
五、測試資料
訓練完成後,對模型進行測試,test_ssd300.py檔案,原始碼如下:
from keras import backend as K from keras.models import load_model from keras.preprocessing import image from keras.optimizers import Adam from imageio import imread import numpy as np from matplotlib import pyplot as plt from models.keras_ssd300 import ssd_300 from keras_loss_function.keras_ssd_loss import SSDLoss from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes from keras_layers.keras_layer_DecodeDetections import DecodeDetections from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast from keras_layers.keras_layer_L2Normalization import L2Normalization from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast from data_generator.object_detection_2d_data_generator import DataGenerator from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels from data_generator.object_detection_2d_geometric_ops import Resize from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms import cv2 # Set the image size. img_height = 300 img_width = 300 # # TODO: Set the path to the `.h5` file of the model to be loaded. # # model_path = 'ssd300_model.h5' # model_path = 'VGG_VOC0712Plus_SSD_300x300_iter_240000.h5' # # We need to create an SSDLoss object in order to pass that to the model loader. # ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0) # # K.clear_session() # Clear previous models from memory. # # model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes, # 'L2Normalization': L2Normalization, # 'DecodeDetections': DecodeDetections, # 'compute_loss': ssd_loss.compute_loss}) K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, 3), n_classes=1, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # 2: Load the trained weights into the model. # TODO: Set the path of the trained weights. # weights_path ='VGG_VOC0712Plus_SSD_300x300_iter_240000.h5' # weights_path ='ssd300_model_liehen_small.h5' # weights_path ='ssd300_model_liehen_expand.h5' weights_path ='ssd300_model_liehen.h5' model.load_weights(weights_path, by_name=True) # 3: Compile the model so that Keras won't complain the next time you load it. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) model.summary() orig_images = [] # Store the images here. input_images = [] # Store resized versions of the images here. # We'll only load one image in this example. # img_path = 'VOC2007/JPEGImages/16.jpg' img_path='F:/Data/crack image/ChallengeDataset/ChallengeDataset/train/neg/428.jpg' image_opencv=cv2.imread(img_path) # img_path='VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/000001.jpg' orig_images.append(imread(img_path)) img = image.load_img(img_path, target_size=(img_height, img_width)) img = image.img_to_array(img) input_images.append(img) input_images = np.array(input_images) #對新的影象進行預測 y_pred = model.predict(input_images) # confidence_threshold = 0 y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])] np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_thresh[0]) # Display the image and draw the predicted boxes onto it. # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() # classes = ['background', # 'aeroplane', 'bicycle', 'bird', 'boat', # 'bottle', 'bus', 'car', 'cat', # 'chair', 'cow', 'diningtable', 'dog', # 'horse', 'motorbike', 'person', 'pottedplant', # 'sheep', 'sofa', 'train', 'tvmonitor'] classes=['background','neg'] plt.figure(figsize=(20,12)) plt.imshow(orig_images[0]) current_axis = plt.gca() for box in y_pred_thresh[0]: # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions. xmin = box[2] * orig_images[0].shape[1] / img_width ymin = box[3] * orig_images[0].shape[0] / img_height xmax = box[4] * orig_images[0].shape[1] / img_width ymax = box[5] * orig_images[0].shape[0] / img_height color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0}) cv2.putText(image_opencv, label, (int(xmin), int(ymin)-10), cv2.FONT_HERSHEY_COMPLEX, 0.8, (255, 255, 0), 1) cv2.rectangle(image_opencv, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2) cv2.namedWindow("Canvas",0) cv2.imshow("Canvas", image_opencv) cv2.waitKey(0)
測試結果: