1. 程式人生 > 實用技巧 >深度學習下的影象資料增強

深度學習下的影象資料增強

在深度學習領域,對於資料量的要求是巨大的,在CV領域,我們通過影象資料增強對現有影象資料進行處理來豐富影象訓練集,這樣可以有效的泛化模型,解決過擬合的問題。

常用的影象資料增強方式有旋轉影象、裁剪影象、水平或垂直翻轉影象,改變影象亮度等,為了方便訓練模型,我們通常會對資料進行歸一化或者標準化以及將圖片大小設定為一樣。

下面我們分別通過opencv庫、pytorch內建函式、tensorflow2內建函式來實現以上功能。

1.opencv

1.1利用opencv讀取影象

cv2.imread("圖片地址", “模式引數”), 模式引數包括從cv2.IMREAD_COLOR(1),預設;cv2.IMREAD_GRAYSCALE(0),cv2.IMREAD_UNCHANGED(-1)

返回結果為numpy格式的陣列。

img_path = os.path.join(current_dir, image_dir)
img = cv2.imread(img_path, 1)
print(img.shape)
>>>(500, 375, 3)


img_path = os.path.join(current_dir, image_dir)
img = cv2.imread(img_path, 0)
print(img.shape)
>>>(500, 375)

對影象進行顯示

1.2影象歸一化

class Normalize(object):
    
def __init__(self, mean_val, std_val, val_scale=1): # set val_scale = 1 if mean and std are in range (0,1) # set val_scale to other value, if mean and std are in range (0,255) self.mean = np.array(mean_val, dtype=np.float32) self.std = np.array(std_val, dtype=np.float32) self.val_scale
= 1 / 255.0 if val_scale == 1 else 1 def __call__(self, image, label=None):
    # 轉化為32位浮點型,便於後面轉化位tensor 輸入網路 image
= image.astype(np.float32)
   # 將圖片縮放到[0-1] image = image * self.val_scale
    # 標準化 image = image - self.mean image = image * (1 / self.std)return image, label

1.3改變影象大小(resize),一般通過插值來改變影象大小,常用的插值方法有線性插值、雙線性插值、三次插值、最鄰域插值

class Resize(object):
    def __init__(self, size):
        assert type(size) in [int, tuple], "CHECK SIZE TYPE!" 
        if isinstance(size, int):
            self.size = (size, size)
        else:
            self.size = size

    def __call__(self, image, label=None):
    # 採用opencv內建API(resize), interpolation= 插值型別 image
= cv2.resize(image, dsize=self.size, interpolation=cv2.INTER_LINEAR) if label is not None: label = cv2.resize(label, dsize=self.size, interpolation=cv2.INTER_NEAREST) return image, label
resize = Resize(600)
r_img = resize(img)

r_img[0].shape
>>>(600,600,3)

1.4 隨機裁剪(crop)

class RandomCrop(object):
    def __init__(self, size):
        assert type(size) in [int, tuple], "CHECK SIZE TYPE!"
        if isinstance(size, int):
            self.size = (size, size)
        else:
            self.size = size

    def __call__(self, image, label=None):
        h, w = image.shape[:2]

        try:
            h_start = np.random.randint(0, h - self.size[0] + 1)
            w_start = np.random.randint(0, w - self.size[1] + 1)
            h_end, w_end = h_start + self.size[0], w_start + self.size[1]

            image = image[h_start:h_end, w_start:w_end, :]
            if label is not None:
                label = label[h_start:h_end, w_start:w_end]
        except Exception as e:
            print('CROP OUT OF IMAGE, RETURN ORIGIN IMAGE!')
        return image, label

1.5 水平或豎直翻轉

class RandomHoriFlip():
    def __init__(self, prob=0.5):
        self.prob = prob
        
    def __call__(self, image, label=None):
        rd = np.random.rand()
        if rd < self.prob:
            # 水平翻轉
            image = image[:, ::-1, :]
            # 豎直翻轉
            # image = image[::-1, :, :]
            if label is not None:
            # 水平翻轉
                label = label[:,::-1]
             # 豎直翻轉
                # label = label[::-1,:]
        return image, label        

1.6 旋轉影象

class RandomRotate():
    def __init__(self, degree):
        self.degree = degree
        
    def __call__(self, image, label=None):
        h, w, c = image.shape
        center_x = w // 2
        center_y = h // 2
        center = (center_x, center_y)
        
        M= cv2.getRotationMatrix2D(center, -self.degree, 1.)
        image = cv2.warpAffine(image, M, (w, h))
        if label is not None:
            label = cv2.warpAffine(label, M, (w, h))
        
        return image, label
rr = RandomRotate(45)
rr_img = rr(img)

cv2.namedWindow("imageShowTest")
cv2.imshow("imageShowTest", rr_img[0])

cv2.waitKey(0)
cv2.destroyAllWindows()

為了實現對一張圖片實現上述一系列變換,通常需要用Compose進行封裝

class Compose():
    def __init__(self, transforms):
        self.transforms = transforms
        
    def __call__(self,image, label=None):
        for t in self.transforms:
            image, label = t(image, label)
        return image, label
compose = Compose([Resize(600), RandomRotate(45)])
c_img = compose(img)

可以把所以要實施的變換引數都封裝到一個類中(影象資料增強類)

class TrainAugumentation():
    def __init__(self, image_size, rotation_degree, mean_val, std_val):
        self.image_size = image_size
        self.rotation_degree = rotation_degree
        self.mean_val = mean_val
        self.std_val = std_val
        self.transforms = Compose([Resize(image_size), RandomRotate(rotation_degree), Normalize(mean_val, std_val)])
        
        
    def __call__(self, image, label=None):
        return self.augment(image, label)
    
    def augment(self, image, label):
        image, label = self.transforms(image, label)
        return image, label

2.pytorch

pytorch在torchvision.transforms集成了各種影象變換函式,如:

Compose() #封裝各種影象變換

Normalize()

Resize()

RandomHorizontalFlip()

RandomCrop() ToTensor()

注意ToTensor() 是將圖片畫素值轉化成[0-1],然後轉化為Tensor

pytorch預設對PIL開啟的影象物件進行變換

  def __getitem__(self, index):
    '''
    index 自動+1
    ''' 
    img = Image.open(os.path.join(self.data_dir, self.names[index])).convert('RGB')
    # print("影象資料已輸入")
    target = int(self.labels[index])
    camid = self.cams[index]

    if self.train_data_transform != None:
      img = self.train_data_transform(img)
    
    return img, target, camid

3.tensorflow

tensorflow2 在tensorflow.kereas.preprocessing.image 封裝了各種影象處理的API,在模型訓練時都會使用圖片生成器ImageDataGenerator。

該函式API會在模型訓練時無限生成資料,成成資料的方式會根據引數設定實時進行資料增強。

keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,  
                                             samplewise_center=False, 
                                             featurewise_std_normalization=False, 
                                             samplewise_std_normalization=False, 
                                             zca_whitening=False, 
                                             zca_epsilon=1e-06, 
                                             rotation_range=0, 
                                             width_shift_range=0.0, 
                                             height_shift_range=0.0, 
                                             brightness_range=None, 
                                             shear_range=0.0, 
                                             zoom_range=0.0, 
                                             channel_shift_range=0.0, 
                                             fill_mode='nearest', 
                                             cval=0.0, 
                                             horizontal_flip=False, 
                                             vertical_flip=False, 
                                             rescale=None, 
                                             preprocessing_function=None, 
                                             data_format=None, 
                                             validation_split=0.0, 
                                             dtype=None)

對部分引數解釋如下:

featurewise_center: 將輸入資料的均值設定為0,逐特徵(按通道)進行

samplewize_center:將每個樣本的均值設定為0

featurewise_std_normalization:將輸入資料除以標準差,逐特徵(通道)進行

samplewise_std_normalization:每個輸入除以其標準差

zca_epsilon:zca白化的epsilon值

zca_whitening:是否應用zca白化

rotation_range:影象旋轉

width_shift_range:影象水平偏移

height_shift_range:影象垂直偏移

shear_range:影象剪下強度(矩形轉化為平行四邊形)

zoom_range:影象放大

channel_shift_range:通道偏移

horizontal_flip:水平翻轉

vertical_flip:垂直翻轉

rescale:縮放因子,將影象從0-255轉化為0-1

在訓練模型時採用flow()方法或者flowfromdirectory()將資料封裝為batch

from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rotation_range=45,
                  width_shift_range=0.2,
                  height_shift_range=0.2,
                  shear_range=0.2,
                  zoom_range=0.25,
                  horizontal_flip=True,
                  fill_mode='nearest')
test_datagen=ImageDataGenerator()

hist = model.fit(train_datagen.flow(Xtr, ytr, batch_size=BATCH_SIZE),steps_per_epoch=train_idx.sum()//BATCH_SIZE,
                           epochs=EPOCHS,
                           validation_data=test_datagen.flow(Xv, yv, batch_size=BATCH_SIZE),
                           validation_steps=valid_idx.sum()//BATCH_SIZE, verbose=2)

在呼叫flow()d方法時,會成成一個生成器,按batch_size的大小來輸出圖片資料,輸出時會自動呼叫random_transform 和standardize方法進行資料變換。

    batch_x = np.zeros(tuple([current_batch_size] + list(self.x.shape)[1:]), dtype=K.floatx())
    for i, j in enumerate(index_array):
      x = self.x[j]
      x = self.image_data_generator.random_transform(x.astype(K.floatx()))
      x = self.image_data_generator.standardize(x)
      batch_x[i] = x

著重理解一下random_transform方法

  def random_transform(self, x, seed=None):
    """Randomly augment a single image tensor.
    # Arguments
        x: 3D tensor, single image.
        seed: random seed.
    # Returns
        A randomly transformed version of the input (same shape).
    """
    # x is a single image, so it doesn't have image number at index 0
    img_row_axis = self.row_axis - 1               #0
    img_col_axis = self.col_axis - 1               #1
    img_channel_axis = self.channel_axis - 1       #2

    if seed is not None:
      np.random.seed(seed)

    # use composition of homographies
    # to generate final transform that needs to be applied
    if self.rotation_range:
      theta = np.pi / 180 * np.random.uniform(-self.rotation_range, self.rotation_range)
    else:
      theta = 0

    if self.height_shift_range:
      tx = np.random.uniform(-self.height_shift_range, self.height_shift_range) * x.shape[img_row_axis]
    else:
      tx = 0

    if self.width_shift_range:
      ty = np.random.uniform(-self.width_shift_range, self.width_shift_range) * x.shape[img_col_axis]
    else:
      ty = 0

    if self.shear_range:
      shear = np.random.uniform(-self.shear_range, self.shear_range)
    else:
      shear = 0

    if self.zoom_range[0] == 1 and self.zoom_range[1] == 1:
      zx, zy = 1, 1
    else:
      zx, zy = np.random.uniform(self.zoom_range[0], self.zoom_range[1], 2)

    transform_matrix = None
if theta != 0:
    # 確定旋轉矩陣 rotation_matrix
= np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) transform_matrix = rotation_matrix if tx != 0 or ty != 0:
    # 確定平移矩陣 shift_matrix
= np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) transform_matrix = shift_matrix if transform_matrix is None else np.dot(transform_matrix, shift_matrix) if shear != 0:
   # 確定剪下矩陣 shear_matrix
= np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) transform_matrix = shear_matrix if transform_matrix is None else np.dot(transform_matrix, shear_matrix) if zx != 1 or zy != 1:
# 確定縮放矩陣    zoom_matrix
= np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) transform_matrix = zoom_matrix if transform_matrix is None else np.dot(transform_matrix, zoom_matrix) if transform_matrix is not None: h, w = x.shape[img_row_axis], x.shape[img_col_axis]
   # 沿中心點平移 transform_matrix
= transform_matrix_offset_center(transform_matrix, h, w)
# 應用上述變換,實現影象資料增強 x
= apply_transform(x, transform_matrix, img_channel_axis, fill_mode=self.fill_mode, cval=self.cval)    # 通道偏移 if self.channel_shift_range != 0: x = random_channel_shift(x, self.channel_shift_range, img_channel_axis)
   # 水平翻轉
if self.horizontal_flip: if np.random.random() < 0.5: x = flip_axis(x, img_col_axis) # 垂直翻轉 if self.vertical_flip: if np.random.random() < 0.5: x = flip_axis(x, img_row_axis) return x
def transform_matrix_offset_center(matrix, x, y):
  o_x = float(x) / 2 + 0.5
  o_y = float(y) / 2 + 0.5
  offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
  reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
  transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
  return transform_matrix


def apply_transform(x,
                    transform_matrix,
                    channel_axis=0,
                    fill_mode='nearest',
                    cval=0.):
  """Apply the image transformation specified by a matrix.
  # Returns
      The transformed version of the input.
  """
  x = np.rollaxis(x, channel_axis, 0)
  final_affine_matrix = transform_matrix[:2, :2]
  final_offset = transform_matrix[:2, 2]

#
ndi :scipy.ndimage
  channel_images = [ndi.interpolation.affine_transform(
    x_channel,
    final_affine_matrix,
    final_offset,
    order=0,
    mode=fill_mode,
    cval=cval) for x_channel in x]
  x = np.stack(channel_images, axis=0)
  x = np.rollaxis(x, 0, channel_axis + 1)
  return x

參考部落格:

https://blog.csdn.net/dugudaibo/article/details/87719078?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.channel_param&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.channel_param