論文Multi-Perspective Sentence Similarity Modeling with Convolution Neural Networks實現之網路模型搭建及訓練
阿新 • • 發佈:2018-11-21
環境:
Python3.6
Tensorflow-GPU 1.8.0
本文所實現的網路模型是在https://blog.csdn.net/liuchonge/article/details/64440110的基礎上搭建的,不同的是為了應對loss為NAN的情況,本文在每一層卷積的後面都添加了一層BN,且comU1只計算cosine距離和L1距離,comU2只計算cosine距離。
基於此,本文只列出訓練檔案的程式碼。
首先,對GPU進行設定,並對模型涉及的引數進行設定,如下所示:
#GPU設定 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = "0" """各項引數設定""" #網路引數 tf.flags.DEFINE_integer('sentence_length', 100, 'The length of sentence') tf.flags.DEFINE_integer('embedding_size', 50, 'The dimension of the word embedding') tf.flags.DEFINE_integer('num_filters_A', 20, 'The number of filters in block A') tf.flags.DEFINE_integer('num_filters_B', 20, 'The number of filters in block B') tf.flags.DEFINE_string('filter_sizes', '1,2,100', 'The size of filter') tf.flags.DEFINE_integer('num_classes', 6, 'The number of lables') tf.flags.DEFINE_integer('n_hidden', 150, 'The number of hidden units in the fully connected layer') tf.flags.DEFINE_float('dropout_keep_prob', 0.5, 'The proability of dropout') #訓練引數 tf.flags.DEFINE_integer('num_epochs', 10, 'The number of epochs to be trained') tf.flags.DEFINE_integer('batch_size', 32, 'The size of mini batch') tf.flags.DEFINE_integer('evaluate_every', 100, 'Evaluate model on dev set after this many steps(default:100)') tf.flags.DEFINE_integer('checkpoint_every', 100, 'Save model after this many steps(default;100)') tf.flags.DEFINE_integer('num_checkpoints', 5, 'The number of checkpoints to store(default:5)') #L2正則項 tf.flags.DEFINE_float('lr', 1e-3, 'The learning rate of this model') tf.flags.DEFINE_float('l2_reg_lambda', 1e-4, 'The regulatization parameter') #裝置引數 tf.flags.DEFINE_boolean('allow_soft_placement', True, 'Allow device soft device placement') tf.app.flags.DEFINE_boolean('log_device_placement', False, 'Log placement of ops on devices') """各項引數設定""" FLAGS = tf.flags.FLAGS FLAGS.flag_values_dict() print('\nParameters:') for attr, value in sorted(FLAGS.__flags.items()): print('{}={}'.format(attr.upper(), value)) print('')
設定完引數之後,我們需要讀取資料:
#glove是載入的詞向量。glove.d是單詞索引字典<word, index>,glove.g是詞向量矩陣<詞個數,300> print('loading glove...') glove = emb.GloVe(N=50) print('==============GloVe模型載入完畢!===================') print("Loading data...") Xtrain, ytrain = data_helper.load_set(glove, path='./sts/semeval-sts/all') Xtrain[0], Xtrain[1], ytrain = shuffle(Xtrain[0], Xtrain[1], ytrain) #[22592, 句長] Xtest, ytest = data_helper.load_set(glove, path='./sts/semeval-sts/2016') Xtest[0], Xtest[1], ytest = shuffle(Xtest[0], Xtest[1], ytest) #[1186, 句長] print('==============資料載入完畢!===================')
建議採用將讀取GloVe模型進行封裝已具備更好的程式碼健壯性。
繼而,我們就需要編寫訓練模型的程式碼:
"""Start the MPCNN model""" with tf.Graph().as_default(): session_config = tf.ConfigProto(allow_soft_placement = FLAGS.allow_soft_placement, log_device_placement = FLAGS.log_device_placement) session_config.gpu_options.allow_growth = True session = tf.Session(config=session_config) with session.as_default(): """定義輸入輸出等placeholder""" input_1 = tf.placeholder(tf.int32, shape=[None, FLAGS.sentence_length], name='input_x1') input_2 = tf.placeholder(tf.int32, shape=[None, FLAGS.sentence_length], name='input_x2') input_3 = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes], name='input_y') dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') print('佔位符構建完畢!') with tf.device('/cpu:0'), tf.name_scope('embedding'): s0 = tf.nn.embedding_lookup(glove.g, input_1) #此時輸入變數的shape為3維 s1 = tf.nn.embedding_lookup(glove.g, input_2) print('embedding轉換完畢!') with tf.name_scope('reshape'): # input_x1 = tf.expand_dims(s0, -1) #將輸入變數轉換為符合的Tensor4維變數 # input_x2 = tf.expand_dims(s1, -1) input_x1 = tf.reshape(s0, [-1, FLAGS.sentence_length, FLAGS.embedding_size, 1]) input_x2 = tf.reshape(s1, [-1, FLAGS.sentence_length, FLAGS.embedding_size, 1]) input_y = tf.reshape(input_3, [-1, FLAGS.num_classes]) print('reshape完畢!') #構建MPCNN模型 model = MPCNN(num_classes=FLAGS.num_classes, embedding_size=FLAGS.embedding_size, filter_sizes=[int(size) for size in FLAGS.filter_sizes.split(',')], num_filters=[FLAGS.num_filters_A, FLAGS.num_filters_B], n_hidden = FLAGS.n_hidden, input_x1=input_x1, input_x2=input_x2, input_y=input_y, dropout_keep_prob=FLAGS.dropout_keep_prob, l2_reg_lambda = FLAGS.l2_reg_lambda) print('MPCNN模型構建完畢!') global_step = tf.Variable(0, name='global_step', trainable=False) # 獲得模型輸出 print('================模型計算相似性得分====================') model.similarity_measure_layer() print('===============模型計算完畢========================') optimizer = tf.train.AdamOptimizer(FLAGS.lr) grads_and_vars = optimizer.compute_gradients(model.loss) train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step) timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) # print("Writing to {}\n".format(out_dir)) # loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, session.graph) # dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, session.graph) # # checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) # checkpoint_prefix = os.path.join(checkpoint_dir, "model") # if not os.path.exists(checkpoint_dir): # os.makedirs(checkpoint_dir) # saver = tf.train.Saver(tf.global_variables(), max_to_keep=conf.num_checkpoints) def train(x1_batch, x2_batch, y_batch): """ A single training step """ feed_dict = { input_1: x1_batch, input_2: x2_batch, input_3: y_batch, dropout_keep_prob: 0.5 } _, step, summaries, batch_loss, accuracy = session.run( [train_step, global_step, train_summary_op, model.loss, model.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, batch_loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x1_batch, x2_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { input_1: x1_batch, input_2: x2_batch, input_3: y_batch, dropout_keep_prob: 1 } _, step, summaries, batch_loss, accuracy = session.run( [train_step, global_step, dev_summary_op, model.loss, model.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() dev_summary_writer.add_summary(summaries, step) # if writer: # writer.add_summary(summaries, step) return batch_loss, accuracy session.run(tf.global_variables_initializer()) print('模型引數初始化完畢!') print('生成batch') batches = data_helper.batch_iter(list(zip(Xtrain[0], Xtrain[1], ytrain)), FLAGS.batch_size, FLAGS.num_epochs) print('batch生成完畢!') print('Start Training......') for batch in batches: x1_batch, x2_batch, y_batch = zip(*batch) train(x1_batch, x2_batch, y_batch) current_step = tf.train.global_step(session, global_step) if current_step % FLAGS.evaluate_every == 0: total_dev_loss = 0.0 total_dev_accuracy = 0.0 print("\nEvaluation:") dev_batches = data_helper.batch_iter(list(zip(Xtest[0], Xtest[1], ytest)), FLAGS.batch_size, 1) for dev_batch in dev_batches: x1_dev_batch, x2_dev_batch, y_dev_batch = zip(*dev_batch) dev_loss, dev_accuracy = dev_step(x1_dev_batch, x2_dev_batch, y_dev_batch) total_dev_loss += dev_loss total_dev_accuracy += dev_accuracy total_dev_accuracy = total_dev_accuracy / (len(ytest) / FLAGS.batch_size) print("dev_loss {:g}, dev_acc {:g}, num_dev_batches {:g}".format(total_dev_loss, total_dev_accuracy, len(ytest) / FLAGS.batch_size)) print("Optimization Finished!")
如此,整個訓練檔案就編寫完畢!
執行之後,結果如下所示: