1. 程式人生 > >論文Multi-Perspective Sentence Similarity Modeling with Convolution Neural Networks實現之網路模型搭建及訓練

論文Multi-Perspective Sentence Similarity Modeling with Convolution Neural Networks實現之網路模型搭建及訓練

環境:

Python3.6

Tensorflow-GPU 1.8.0

本文所實現的網路模型是在https://blog.csdn.net/liuchonge/article/details/64440110的基礎上搭建的,不同的是為了應對loss為NAN的情況,本文在每一層卷積的後面都添加了一層BN,且comU1只計算cosine距離和L1距離,comU2只計算cosine距離。

基於此,本文只列出訓練檔案的程式碼。

首先,對GPU進行設定,並對模型涉及的引數進行設定,如下所示:

#GPU設定
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "0"


"""各項引數設定"""
#網路引數
tf.flags.DEFINE_integer('sentence_length', 100, 'The length of sentence')
tf.flags.DEFINE_integer('embedding_size', 50, 'The dimension of the word embedding')
tf.flags.DEFINE_integer('num_filters_A', 20, 'The number of filters in block A')
tf.flags.DEFINE_integer('num_filters_B', 20, 'The number of filters in block B')
tf.flags.DEFINE_string('filter_sizes', '1,2,100', 'The size of filter')
tf.flags.DEFINE_integer('num_classes', 6, 'The number of lables')
tf.flags.DEFINE_integer('n_hidden', 150, 'The number of hidden units in the fully connected layer')
tf.flags.DEFINE_float('dropout_keep_prob', 0.5, 'The proability of dropout')
#訓練引數
tf.flags.DEFINE_integer('num_epochs', 10, 'The number of epochs to be trained')
tf.flags.DEFINE_integer('batch_size', 32, 'The size of mini batch')
tf.flags.DEFINE_integer('evaluate_every', 100, 'Evaluate model on dev set after this many steps(default:100)')
tf.flags.DEFINE_integer('checkpoint_every', 100, 'Save model after this many steps(default;100)')
tf.flags.DEFINE_integer('num_checkpoints', 5, 'The number of checkpoints to store(default:5)')
#L2正則項
tf.flags.DEFINE_float('lr', 1e-3, 'The learning rate of this model')
tf.flags.DEFINE_float('l2_reg_lambda', 1e-4, 'The regulatization parameter')
#裝置引數
tf.flags.DEFINE_boolean('allow_soft_placement', True, 'Allow device soft device placement')
tf.app.flags.DEFINE_boolean('log_device_placement', False, 'Log placement of ops on devices')
"""各項引數設定"""

FLAGS = tf.flags.FLAGS
FLAGS.flag_values_dict()
print('\nParameters:')
for attr, value in sorted(FLAGS.__flags.items()):
    print('{}={}'.format(attr.upper(), value))
print('')

設定完引數之後,我們需要讀取資料:

#glove是載入的詞向量。glove.d是單詞索引字典<word, index>,glove.g是詞向量矩陣<詞個數,300>
print('loading glove...')
glove = emb.GloVe(N=50)
print('==============GloVe模型載入完畢!===================')

print("Loading data...")
Xtrain, ytrain = data_helper.load_set(glove, path='./sts/semeval-sts/all')
Xtrain[0], Xtrain[1], ytrain = shuffle(Xtrain[0], Xtrain[1], ytrain)
#[22592, 句長]
Xtest, ytest = data_helper.load_set(glove, path='./sts/semeval-sts/2016')
Xtest[0], Xtest[1], ytest = shuffle(Xtest[0], Xtest[1], ytest)
#[1186, 句長]
print('==============資料載入完畢!===================')

建議採用將讀取GloVe模型進行封裝已具備更好的程式碼健壯性。

繼而,我們就需要編寫訓練模型的程式碼:

"""Start the MPCNN model"""
with tf.Graph().as_default():
    session_config = tf.ConfigProto(allow_soft_placement = FLAGS.allow_soft_placement,
                                    log_device_placement = FLAGS.log_device_placement)
    session_config.gpu_options.allow_growth = True
    session = tf.Session(config=session_config)
    with session.as_default():
        """定義輸入輸出等placeholder"""
        input_1 = tf.placeholder(tf.int32, shape=[None, FLAGS.sentence_length], name='input_x1')
        input_2 = tf.placeholder(tf.int32, shape=[None, FLAGS.sentence_length], name='input_x2')
        input_3 = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes], name='input_y')
        dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
        print('佔位符構建完畢!')

        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            s0 = tf.nn.embedding_lookup(glove.g, input_1) #此時輸入變數的shape為3維
            s1 = tf.nn.embedding_lookup(glove.g, input_2)
            print('embedding轉換完畢!')

        with tf.name_scope('reshape'):
            # input_x1 = tf.expand_dims(s0, -1) #將輸入變數轉換為符合的Tensor4維變數
            # input_x2 = tf.expand_dims(s1, -1)
            input_x1 = tf.reshape(s0, [-1, FLAGS.sentence_length, FLAGS.embedding_size, 1])
            input_x2 = tf.reshape(s1, [-1, FLAGS.sentence_length, FLAGS.embedding_size, 1])
            input_y = tf.reshape(input_3, [-1, FLAGS.num_classes])
        print('reshape完畢!')

        #構建MPCNN模型
        model = MPCNN(num_classes=FLAGS.num_classes, embedding_size=FLAGS.embedding_size,
                      filter_sizes=[int(size) for size in FLAGS.filter_sizes.split(',')],
                      num_filters=[FLAGS.num_filters_A, FLAGS.num_filters_B],
                      n_hidden = FLAGS.n_hidden,
                      input_x1=input_x1,
                      input_x2=input_x2,
                      input_y=input_y,
                      dropout_keep_prob=FLAGS.dropout_keep_prob,
                      l2_reg_lambda = FLAGS.l2_reg_lambda)
        print('MPCNN模型構建完畢!')


        global_step = tf.Variable(0, name='global_step', trainable=False)
        # 獲得模型輸出
        print('================模型計算相似性得分====================')
        model.similarity_measure_layer()
        print('===============模型計算完畢========================')
        optimizer = tf.train.AdamOptimizer(FLAGS.lr)
        grads_and_vars = optimizer.compute_gradients(model.loss)
        train_step = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        # print("Writing to {}\n".format(out_dir))
        #
        loss_summary = tf.summary.scalar("loss", model.loss)
        acc_summary = tf.summary.scalar("accuracy", model.accuracy)
        #
        train_summary_op = tf.summary.merge([loss_summary, acc_summary])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, session.graph)
        #
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, session.graph)


        #
        # checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        # checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        # if not os.path.exists(checkpoint_dir):
        #     os.makedirs(checkpoint_dir)
        # saver = tf.train.Saver(tf.global_variables(), max_to_keep=conf.num_checkpoints)

        def train(x1_batch, x2_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                input_1: x1_batch,
                input_2: x2_batch,
                input_3: y_batch,
                dropout_keep_prob: 0.5
            }
            _, step, summaries, batch_loss, accuracy = session.run(
                [train_step, global_step, train_summary_op, model.loss, model.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, batch_loss, accuracy))
            train_summary_writer.add_summary(summaries, step)


        def dev_step(x1_batch, x2_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
                input_1: x1_batch,
                input_2: x2_batch,
                input_3: y_batch,
                dropout_keep_prob: 1
            }
            _, step, summaries, batch_loss, accuracy = session.run(
                [train_step, global_step, dev_summary_op, model.loss, model.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            dev_summary_writer.add_summary(summaries, step)
            # if writer:
            #     writer.add_summary(summaries, step)

            return batch_loss, accuracy


        session.run(tf.global_variables_initializer())
        print('模型引數初始化完畢!')
        print('生成batch')
        batches = data_helper.batch_iter(list(zip(Xtrain[0], Xtrain[1], ytrain)), FLAGS.batch_size, FLAGS.num_epochs)
        print('batch生成完畢!')
        print('Start Training......')
        for batch in batches:
            x1_batch, x2_batch, y_batch = zip(*batch)
            train(x1_batch, x2_batch, y_batch)
            current_step = tf.train.global_step(session, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                total_dev_loss = 0.0
                total_dev_accuracy = 0.0

                print("\nEvaluation:")
                dev_batches = data_helper.batch_iter(list(zip(Xtest[0], Xtest[1], ytest)), FLAGS.batch_size, 1)
                for dev_batch in dev_batches:
                    x1_dev_batch, x2_dev_batch, y_dev_batch = zip(*dev_batch)
                    dev_loss, dev_accuracy = dev_step(x1_dev_batch, x2_dev_batch, y_dev_batch)
                    total_dev_loss += dev_loss
                    total_dev_accuracy += dev_accuracy
                total_dev_accuracy = total_dev_accuracy / (len(ytest) / FLAGS.batch_size)
                print("dev_loss {:g}, dev_acc {:g}, num_dev_batches {:g}".format(total_dev_loss, total_dev_accuracy,
                                                                               len(ytest) / FLAGS.batch_size))
   

        print("Optimization Finished!")

如此,整個訓練檔案就編寫完畢!

執行之後,結果如下所示: