keras——基於神經網路的風格遷移生成藝術字
阿新 • • 發佈:2018-12-31
Keras是一個高層神經網路API,由純Python編寫而成,至少依賴Tensorflow、Theano、CNTK一種神經網路框架,這裡建議用Tensorflow。
Keras的特性:
1.簡易和快速的原型設計(keras具有高度模組化,極簡,和可擴充特性)
2.支援CNN和RNN,或二者的結合
3.無縫CPU和GPU切換
Keras的設計原則是
1、使用者友好:說白了就是傻瓜式開發,狗上狗也行。
2、模組性:模型可理解為一個層的序列或資料的運算圖,完全可配置的模組可以用最少的代價自由組合在一起。具體而言,網路層、損失函式、優化器、初始化策略、啟用函式、正則化方法都是獨立的模組,你可以使用它們來構建自己的模型。
3、易擴充套件性:新增新模組超級容易,只需要仿照現有的模組編寫新的類或函式即可。建立新模組的便利性使得Keras更適合於先進的研究工作。
4、與Python協作:Keras沒有單獨的模型配置檔案型別(作為對比,caffe有),模型由python程式碼描述,使其更緊湊和更易debug,並提供了擴充套件的便利性。
基於卷積神經網路的風格遷移Keras生成藝術字的demo(原作者餘唯民 & 雷芳涵,程式碼稍作修改,原版程式碼在文末git倉庫)
支援一張或兩張風格圖
測試程式碼:
第一次呼叫需要下載Keras提供的Vgg19網路,所以比較慢。
迭代處理建議用GPU,CPU的話我的老古董迭代一次大概在一分鐘左右
from __future__ import print_function from keras.preprocessing.image import load_img, img_to_array import numpy as np from scipy.optimize import fmin_l_bfgs_b import time import argparse from scipy.misc import imsave from keras.applications import vgg19 from keras import backend as K import os from PIL import Image, ImageFont, ImageDraw, ImageOps, ImageEnhance, ImageFilter import random random.seed(0) def save_img(fname, image, image_enhance=False): # 影象可以增強 image = Image.fromarray(image) if image_enhance: # 亮度增強 enh_bri = ImageEnhance.Brightness(image) brightness = 1.2 image = enh_bri.enhance(brightness) # 色度增強 enh_col = ImageEnhance.Color(image) color = 1.2 image = enh_col.enhance(color) # 銳度增強 enh_sha = ImageEnhance.Sharpness(image) sharpness = 1.2 image = enh_sha.enhance(sharpness) imsave(fname, image) return def smooth(image): # 模糊圖片 w, h, c = image.shape smoothed_image = np.zeros([w - 2, h - 2,c]) smoothed_image += image[:w - 2, 2:h,:] smoothed_image += image[1:w-1, 2:,:] smoothed_image += image[2:, 2:h,:] smoothed_image += image[:w-2, 1:h-1,:] smoothed_image += image[1:w-1, 2:h,:] smoothed_image += image[2:, 1:h - 1,:] smoothed_image += image[:w-2, :h-2,:] smoothed_image += image[1:w-1, :h - 2,:] smoothed_image += image[2:, :h - 2,:] smoothed_image /= 9.0 return smoothed_image.astype("uint8") def str_to_tuple(s): s = list(s) ans = list() temp = "" for i in range(len(s)): if s[i] == '(' : continue if s[i] == ',' or s[i] == ')': ans.append(int(temp)) temp = "" continue temp += s[i] return tuple(ans) def char_to_picture(text="", font_name="宋體", background_color=(255,255,255), text_color=(0,0,0), pictrue_size=400, text_position=(0, 0), in_meddium=False, reverse_color=False,smooth_times=0,noise=0): pictrue_shape = (pictrue_size,pictrue_size) im = Image.new("RGB", pictrue_shape, background_color) dr = ImageDraw.Draw(im) # 由於系統內部不是使用漢字檔名,而是英文名,在此轉換 if font_name == "宋體": font_name = "SIMSUN.ttc" if font_name == "楷體": font_name = "SIMKAI.ttf" if font_name == "黑體": font_name = "SIMHEI.ttf" if font_name == "等線": font_name = "DENG.ttf" if font_name == "仿宋": font_name = "SIMFANG.ttf" # 取得字型檔案的位置 font_dir = "C:\Windows\Fonts\\" + font_name font_size = int(pictrue_size * 0.8 / len(text)) # 設定文字的大小 font = ImageFont.truetype(font_dir, font_size) # 開始繪圖 # 如果設定了居中,那麼就居中 # 英文字母的對齊方式並不一樣 char_dict = [] for i in range(26): char_dict.append(chr(i + ord('a'))) char_dict.append(chr(i + ord('A'))) if in_meddium: char_num = len(text) text_position = (pictrue_shape[0]/2 - char_num*font_size/2, pictrue_shape[1]/2 - font_size/2) # 中文 if text in char_dict: text_position = (pictrue_shape[0] / 2 - char_num*font_size/4, pictrue_shape[1] / 2 - font_size / 2) # 英文 # 開始繪製圖像 dr.text(text_position, text, font=font, fill=text_color) if reverse_color: im = ImageOps.invert(im) # 隨機擾動 if noise > 0: print("adding noise...") im_array = np.array(im) noise_num = noise * pictrue_size for i in range(noise_num): pos = (random.randint(0,pictrue_size-1), random.randint(0,pictrue_size-1)) color = [random.randint(0,255), random.randint(0,255), random.randint(0,255)] im_array[pos[0],pos[1],:] = color im = Image.fromarray(im_array) # 模糊化圖片 ''' for i in range(smooth_times): im =im.filter(ImageFilter.GaussianBlur) ''' im_array = np.array(im) for i in range(smooth_times): im_array = smooth(im_array) im = Image.fromarray(im_array) # 圖片經過模糊後略有縮小 im = im.resize(pictrue_shape) print("文字轉換圖片成功") return im #第一張風格圖 style_reference_image_path="Style_Migration_For_Artistic_Font_With_CNN/style/fire/2.jpg" #第二張風格圖 style_reference_image2_path="Style_Migration_For_Artistic_Font_With_CNN/style/fire/1.jpg" #文字,支援多字 chars='九日王朝' #尺寸 pictrue_size =300 #背景顏色 background_color=(0,0,0) #文字顏色 text_color=(255,255,255) #迭代次數 iterations=50 #模糊處理 smooth_times=20 #隨機噪聲 noise=False #色度亮度銳度增強 image_enhance=True #字型 font_name = '楷體' #黑紙白字 reverse_color =False #風格圖一張&兩張 image_input_mode ="two_pic"#image_input_mode ="one_pic"#image_input_mode ="one_picT" #第一張圖的相對權重 two_style_k=0.5 #輸出目錄 result_prefix='Style_Migration_For_Artistic_Font_With_CNN/output/' # 生成輸入圖片 char_image = char_to_picture(chars,font_name=font_name,background_color=background_color,text_color=text_color, pictrue_size=pictrue_size,in_meddium=True,reverse_color=reverse_color, smooth_times=smooth_times,noise=noise) width, height = char_image.size # 風格損失的權重沒有意義,因為對於一張文字圖片來說,不可能有沒有內容損失 style_weight = 1.0 # util function to resize and format pictures into appropriate tensors def preprocess_image(image): """ 預處理圖片,包括變形到(1,width, height)形狀,資料歸一到0-1之間 :param image: 輸入一張圖片 :return: 預處理好的圖片 """ image = image.resize((width, height)) image = img_to_array(image) image = np.expand_dims(image, axis=0) # (width, height)->(1,width, height) image = vgg19.preprocess_input(image) # 0-255 -> 0-1.0 return image def deprocess_image(x): """ 將0-1之間的資料變成圖片的形式返回 :param x: 資料在0-1之間的矩陣 :return: 圖片,資料都在0-255之間 """ x = x.reshape((width, height, 3)) x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') # 以防溢位255範圍 return x # 得到需要處理的資料,處理為keras的變數(tensor),處理為一個(5, width, height, 3)的矩陣 # 分別是文字圖片,風格圖片1,風格圖片1T, 風格圖片2,結果圖片 base_image = K.variable(preprocess_image(char_image)) style_reference_image1 = K.variable(preprocess_image(load_img(style_reference_image_path))) style_reference_image1_T = K.variable(preprocess_image(load_img(style_reference_image_path).transpose(Image.ROTATE_90))) try: style_reference_image2 = K.variable(preprocess_image(load_img(style_reference_image2_path))) except: # 不會用到這個了 if image_input_mode == "two_pic": print("尚未找到第二張圖片,或許您忘記輸入了,請輸入--style_reference_image2_path 第二張圖片的位置") style_reference_image2 = K.variable(preprocess_image(load_img(style_reference_image_path))) combination_image = K.placeholder((1, width, height, 3)) input_tensor = K.concatenate([base_image, style_reference_image1, style_reference_image1_T, style_reference_image2, combination_image], axis=0) # 結合以上5張圖片,作為輸入向量 # 使用Keras提供的訓練好的Vgg19網路 model = vgg19.VGG19(input_tensor=input_tensor,weights='imagenet', include_top=False) model.summary() # Vgg19網路中的不同的名字,儲存起來以備使用 outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) def gram_matrix(x): # Gram矩陣 assert K.ndim(x) == 3 if K.image_data_format() == 'channels_first': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram # 風格損失,是風格圖片與結果圖片的Gram矩陣之差,並對所有元素求和 def style_loss(style, combination): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 S = gram_matrix(style) C = gram_matrix(combination) S_C = S-C channels = 3 size = height * width return K.sum(K.square(S_C)) / (4. * (channels ** 2) * (size ** 2)) #return K.sum(K.pow(S_C,4)) / (4. * (channels ** 2) * (size ** 2)) # 居然和平方沒有什麼不同 #return K.sum(K.pow(S_C,4)+K.pow(S_C,2)) / (4. * (channels ** 2) * (size ** 2)) # 也能用,花後面出現了葉子 loss = K.variable(0.) # 計算風格損失,糅合多個特徵層的資料,取平均 # [ A, B, C, D, E, F ] # feature_layers = ['block1_conv1', 'block2_conv1','block3_conv1', 'block4_conv1','block5_conv1','block5_conv4'] # A全是顏色,沒有紋理---------------------------------------------------->F全是紋理,沒有顏色 feature_layers = ['block1_conv1','block2_conv1','block3_conv1'] feature_layers_w = [10.0,1.0,1.0] for i in range(len(feature_layers)): # 每一層的權重以及資料 layer_name, w = feature_layers[i], feature_layers_w[i] layer_features = outputs_dict[layer_name] style_reference_features1 = layer_features[1, :, :, :] combination_features = layer_features[4, :, :, :] if image_input_mode == "one_pic": style_reference_features_mix = style_reference_features1 elif image_input_mode == "one_pic_T": style_reference_features1_T = layer_features[2, :, :, :] style_reference_features_mix = 0.5 * (style_reference_features1 + style_reference_features1_T) #style_reference_features_mix = K.maximum(style_reference_features1, style_reference_features1_T) else: # image_input_mode == "two_pic" style_reference_features2 = layer_features[3, :, :, :] k = two_style_k style_reference_features_mix = style_reference_features1 * k + style_reference_features2 * (1-k) loss += w * style_loss(style_reference_features_mix, combination_features) # 求得梯度,輸入combination_image,對loss求梯度 grads = K.gradients(loss, combination_image) outputs = [loss] if isinstance(grads, (list, tuple)): outputs += grads else: outputs.append(grads) f_outputs = K.function([combination_image], outputs) def eval_loss_and_grads(x): # 輸入x,輸出對應於x的梯度和loss if K.image_data_format() == 'channels_first': x = x.reshape((1, 3, height, width)) else: x = x.reshape((1, height, width, 3)) outs = f_outputs([x]) # 輸入x,得到輸出 loss_value = outs[0] if len(outs[1:]) == 1: grad_values = outs[1].flatten().astype('float64') else: grad_values = np.array(outs[1:]).flatten().astype('float64') return loss_value, grad_values # Evaluator可以只需要進行一次計算就能得到所有的梯度和loss class Evaluator(object): def __init__(self): self.loss_value = None self.grads_values = None def loss(self, x): assert self.loss_value is None loss_value, grad_values = eval_loss_and_grads(x) self.loss_value = loss_value self.grad_values = grad_values return self.loss_value def grads(self, x): assert self.loss_value is not None grad_values = np.copy(self.grad_values) self.loss_value = None self.grad_values = None return grad_values evaluator = Evaluator() x = preprocess_image(char_image) img = deprocess_image(x.copy()) fname = result_prefix + chars + '_原始圖片.png' save_img(fname, img) # 開始迭代 for i in range(iterations): start_time = time.time() print('代數', i,end=" ") x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20, epsilon=1e-7) # 一個scipy的L-BFGS優化器 print('目前loss:', min_val,end=" ") # 儲存生成的圖片 img = deprocess_image(x.copy()) fname = result_prefix + chars + '_代數_%d.png' % i end_time = time.time() print('耗時%.2f s' % (end_time - start_time)) if i%5 == 0 or i == iterations-1: save_img(fname, img, image_enhance=image_enhance) print('檔案儲存為', fname)
測試截圖:
原文提供的輸出:
自己訓練的輸出:
……醜的一比
嗯嗯,果然不是親生的養不熟o(︶︿︶)o