命名實體識別之bert+bilstm（基於tensorflow）

阿新 • • 發佈：2020-12-13

接下來我們繼續對官方基於bert的模型進行擴充套件，之前的可參考：

直接看程式碼：

class MyModel:
  def __init__(self, config):
    self.config = config
    # 喂入模型的資料佔位符
    self.input_x_word = tf.placeholder(tf.int32, [None, None], name=" 
input_x_word")
    self.input_x_len = tf.placeholder(tf.int32, name='input_x_len')
    self.input_mask = tf.placeholder(tf.int32, [None, None], name='input_mask')
    self.input_relation = tf.placeholder(tf.int32, [None, None], name='input_relation')  # 實體NER的真實標籤
    self.keep_prob = tf.placeholder(tf.float32, name=' 
dropout_keep_prob')
    self.is_training = tf.placeholder(tf.bool, None, name='is_training')
    self.initializer = initializers.xavier_initializer()
    self.lstm_dim = self.config.lstm_dim
    self.relation_num = self.config.relation_num
    self.num_steps = tf.shape(self.input_x_word)[-1]
    print 
("self.num_steps.shape[-1]:",tf.shape(self.input_x_word)[-1])
    self.bert_embed(bert_init=True)
  

  def biLSTM_layer(self, lstm_inputs, lstm_dim, lengths, name=None):
        """
        :param lstm_inputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, 2*lstm_dim]
        """
        with tf.name_scope("char_BiLSTM" if not name else name):
            lstm_cell = {}
            for direction in ["forward", "backward"]:
                with tf.name_scope(direction):
                    lstm_cell[direction] = rnn.CoupledInputForgetGateLSTMCell(
                        lstm_dim,
                        use_peepholes=True,
                        initializer=self.initializer,
                        state_is_tuple=True)
            outputs, final_states = tf.nn.bidirectional_dynamic_rnn(
                lstm_cell["forward"],
                lstm_cell["backward"],
                lstm_inputs,
                dtype=tf.float32,
                sequence_length=lengths)
        return tf.concat(outputs, axis=2)
  def project_layer(self, lstm_outputs, name=None):
      """
      hidden layer between lstm layer and logits
      :param lstm_outputs: [batch_size, num_steps, emb_size]
      :return: [batch_size, num_steps, num_tags]
      """
      with tf.name_scope("project" if not name else name):
          with tf.name_scope("hidden"):
              W = tf.get_variable("HW", shape=[self.lstm_dim * 2, self.lstm_dim],
                                  dtype=tf.float32, initializer=self.initializer)

              b = tf.get_variable("Hb", shape=[self.lstm_dim], dtype=tf.float32,
                                  initializer=tf.zeros_initializer())
              output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim * 2])
              hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))

          # project to score of tags
          with tf.name_scope("logits"):
              W = tf.get_variable("LW", shape=[self.lstm_dim, self.relation_num],
                                  dtype=tf.float32, initializer=self.initializer)

              b = tf.get_variable("Lb", shape=[self.relation_num], dtype=tf.float32,
                                  initializer=tf.zeros_initializer())

              pred = tf.nn.xw_plus_b(hidden, W, b)

          return tf.reshape(pred, [-1, self.num_steps, self.relation_num], name='pred_logits')
  
  def loss_without_crf(self, output_layer, num_labels, bert_init=True):
    with tf.variable_scope("loss"):
      self.logits = output_layer
      self.probabilities = tf.nn.softmax(self.logits, axis=-1)
      log_probs = tf.nn.log_softmax(self.logits, axis=-1) # [?,11]
      print("log_probs.shape:",log_probs.shape)

      self.predictions = tf.argmax(self.logits, axis=-1, name="predictions")

      one_hot_labels = tf.one_hot(self.input_relation, depth=num_labels, dtype=tf.float32) # [?,512,11]
      #print(one_hot_labels)
      #print("one_hot_labels.shape:",one_hot_labels.shape)
      self.per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
      #print("self.per_example_loss.shape:",self.per_example_loss.shape)
      self.loss = tf.reduce_mean(self.per_example_loss)
      print(self.loss)
      #print("self.loss.shape:",self.loss.shape)
      tvars = tf.trainable_variables()
      init_checkpoint = self.config.bert_file 
      assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if bert_init:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
      tf.logging.info("**** Trainable Variables ****")
      for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        print("  name = {}, shape = {}{}".format(var.name, var.shape, init_string))
      print('init bert from checkpoint: {}'.format(init_checkpoint))
      #return self.loss, self.per_example_loss, self.logits, self.probabilities
      


  def bert_embed(self, bert_init=True):
    """
    讀取BERT的TF模型
    :param bert_init:
    :return:
    """
    num_labels = self.config.relation_num
    bert_config_file = self.config.bert_config_file
    bert_config = BertConfig.from_json_file(bert_config_file)

    model = BertModel(
        config=bert_config,
        is_training=self.is_training,  # 微調
        input_ids=self.input_x_word,
        input_mask=self.input_mask,
        token_type_ids=None,
        use_one_hot_embeddings=False)

    # If you want to use the token-level output, use model.get_sequence_output()
    # output_layer = model.get_pooled_output() # [?,768]
    # print("output_layer.shape:",output_layer)
    used = tf.sign(tf.abs(self.input_x_word))
    length = tf.reduce_sum(used, reduction_indices=1)
    self.lengths = tf.cast(length, tf.int32)
    output_layer = model.get_sequence_output()
    lstm_inputs = tf.nn.dropout(output_layer, 0.9)
    output_layer = self.biLSTM_layer(lstm_inputs, self.lstm_dim, self.lengths)
    output_layer = self.project_layer(output_layer)
    print("output_layer.shape:", output_layer.shape)
    self.loss_without_crf(output_layer, num_labels)
    import sys
    sys.exit(0)

結果：

WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/optimization.py:155: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.

WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/bert/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.

{'錁', '蚨', '螭', '荑', 'Q', '芘', '呭', '鏵', '㈠', '靊', '鋮', '氺', '狻', '涜', '鍙', '暍', '閮', '椴', '茀', '鉲', '蒔', 'Z', 'U', '幤', 'X', '瀵', '’', 'F', 'L', '魑', '鸃', '汭', '嗾', '滻', '謖', '媖', '殫', '曁', '佺', '訃', '戔', 'I', '聃', '騸', '劼', '緇', '嶶', '錛', 'T', '椱', '寳', '叒', '燚', '歃', 'G', '騮', 'O', 'V', '笪', '楒', '贇', '`', '轡', '姇', '狴', '慍', '鵒', 'W', 'S', 'N', '煢', '莜', '叕', '鑱', 'K', '犴', '菥', '塱', '捭', 'Y', 'R', '毖', '閞', '郯', '咑', '鎸', '瘜', '劖', '嗮', 'D', '猊', '圇', '旂', '忔', '亓', '”', '邴', 'E', '齬', '檁', 'B', 'J', '鴟', '猻', '犇', '諞', '茆', '旳', '噃', '釒', '祹', '渼', '魎', '瘕', '鐢', '綉', '\ue40a', '瑧', '槊', '翀', '跬', '屓', '疄', '犰', '勮', 'C', '槤', '鐜', '攛', '躂', '釆', '嚚', '銥', '鏰', '戝', '罝', 'P', '亻', '“', '祼', '襤', '睚', '貹', '鋏', '庒', '鍒', '姤', '圪', '浡', '帀', '綯', '齟', '訁', '‘', '猢', '睍', '斲', '屼', 'M', 'A', 'H', '誆', '簰', '雬', '俰', '玎'}
8012
{'桭', 'C', 'T', '歀', 'Q', 'D', 'G', '靊', '烎', 'P', '”', '鉲', '“', 'E', 'O', 'V', '緈', 'Z', 'J', 'B', 'U', 'X', '’', 'F', 'L', '‘', 'W', '尓', 'N', 'S', 'K', '滻', '諨', 'A', 'H', 'Y', 'M', 'R', 'I'}
1105
WARNING:tensorflow:From test_bert.py:388: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

2020-12-13 14:07:47.209770: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-12-13 14:07:47.265991: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:47.266613: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties: 
name: Tesla T4 major: 7 minor: 5 memoryClockRate(GHz): 1.59
pciBusID: 0000:00:04.0
2020-12-13 14:07:47.266923: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-12-13 14:07:47.493085: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-12-13 14:07:47.621614: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-12-13 14:07:47.641392: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-12-13 14:07:47.925153: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-12-13 14:07:47.943921: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-12-13 14:07:48.468415: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-12-13 14:07:48.468625: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.469411: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.470004: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-12-13 14:07:48.525931: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-12-13 14:07:48.526210: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x270ef40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-12-13 14:07:48.526244: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-12-13 14:07:48.677879: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.678754: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x270f100 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-12-13 14:07:48.678790: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2020-12-13 14:07:48.679588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.680198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Found device 0 with properties: 
name: Tesla T4 major: 7 minor: 5 memoryClockRate(GHz): 1.59
pciBusID: 0000:00:04.0
2020-12-13 14:07:48.680265: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-12-13 14:07:48.680295: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-12-13 14:07:48.680319: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-12-13 14:07:48.680346: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-12-13 14:07:48.680371: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-12-13 14:07:48.680393: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-12-13 14:07:48.680416: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-12-13 14:07:48.680497: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.681158: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.681699: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1767] Adding visible gpu devices: 0
2020-12-13 14:07:48.684658: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-12-13 14:07:48.686073: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-12-13 14:07:48.686103: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186]      0 
2020-12-13 14:07:48.686114: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0:   N 
2020-12-13 14:07:48.687110: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.687768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-13 14:07:48.688359: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:39] Overriding allow_growth setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2020-12-13 14:07:48.688404: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 14221 MB memory) -> physical GPU (device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5)
WARNING:tensorflow:From test_bert.py:176: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

self.num_steps.shape[-1]: Tensor("strided_slice_1:0", shape=(), dtype=int32)
WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:175: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:416: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:497: The name tf.assert_less_equal is deprecated. Please use tf.compat.v1.assert_less_equal instead.

WARNING:tensorflow:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:364: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:874: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
WARNING:tensorflow:From /content/drive/My Drive/Deep-Learning-With-Python/chapter8/CCF_ner/tf_utils/bert_modeling.py:282: The name tf.erf is deprecated. Please use tf.math.erf instead.

WARNING:tensorflow:From test_bert.py:209: bidirectional_dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/rnn.py:464: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
WARNING:tensorflow:Entity <bound method CoupledInputForgetGateLSTMCell.call of <tf_utils.rnncell.CoupledInputForgetGateLSTMCell object at 0x7f297e2d2eb8>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/rnn.py:244: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
WARNING:tensorflow:Entity <bound method CoupledInputForgetGateLSTMCell.call of <tf_utils.rnncell.CoupledInputForgetGateLSTMCell object at 0x7f297e2d2fd0>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num'
WARNING:tensorflow:From test_bert.py:225: The name tf.nn.xw_plus_b is deprecated. Please use tf.compat.v1.nn.xw_plus_b instead.

output_layer.shape: (?, ?, 11)
log_probs.shape: (?, ?, 11)
self.per_example_loss.shape: (?, ?)
self.loss.shape: ()
WARNING:tensorflow:From test_bert.py:255: The name tf.trainable_variables is deprecated. Please use tf.compat.v1.trainable_variables instead.

WARNING:tensorflow:From test_bert.py:259: The name tf.train.init_from_checkpoint is deprecated. Please use tf.compat.v1.train.init_from_checkpoint instead.

WARNING:tensorflow:From test_bert.py:260: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.

  name = bert/embeddings/word_embeddings:0, shape = (21128, 768), *INIT_FROM_CKPT*
  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_1/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_3/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_4/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_5/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_6/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_7/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_8/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_9/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_10/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
  name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
  name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_xi:0, shape = (768, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_hi:0, shape = (256, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_ci:0, shape = (256, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_xo:0, shape = (768, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_ho:0, shape = (256, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_co:0, shape = (256, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_xc:0, shape = (768, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_w_hc:0, shape = (256, 256)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_b_i:0, shape = (256,)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_b_c:0, shape = (256,)
  name = bidirectional_rnn/fw/coupled_input_forget_gate_lstm_cell/_b_o:0, shape = (256,)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_xi:0, shape = (768, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_hi:0, shape = (256, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_ci:0, shape = (256, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_xo:0, shape = (768, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_ho:0, shape = (256, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_co:0, shape = (256, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_xc:0, shape = (768, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_w_hc:0, shape = (256, 256)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_b_i:0, shape = (256,)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_b_c:0, shape = (256,)
  name = bidirectional_rnn/bw/coupled_input_forget_gate_lstm_cell/_b_o:0, shape = (256,)
  name = HW:0, shape = (512, 256)
  name = Hb:0, shape = (256,)
  name = LW:0, shape = (256, 11)
  name = Lb:0, shape = (11,)
init bert from checkpoint: /content/drive/MyDrive/Deep-Learning-With-Python/chapter8/CCF_ner/bert_pretrained/bert_model.ckpt
WARNING:tensorflow:From test_bert.py:392: The name tf.train.exponential_decay is deprecated. Please use tf.compat.v1.train.exponential_decay instead.

bert train variable num: 199
normal train variable num: 26
word2vec trainable!!

說明：

我們可以直接呼叫官方的tensorflow的bert模型來使用bert，接下來，我們使用output_layer=model.get_sequence_output()來獲得最後一層的特徵，然後接下來在新增bilstm層，

對於bilstm的前向和反向的輸出進行拼接後，經過一個project_layer()函式計算logits，最後再經過一個損失層計算損失和其它的一些預測的值等。同時我們要將預訓練bert模型的引數匯入到bert中。

這裡面我們可以通過這種方式計算每個序列的長度：

used = tf.sign(tf.abs(self.input_x_word))
length = tf.reduce_sum(used, reduction_indices=1)
self.lengths = tf.cast(length, tf.int32)

當然，在喂入資料的時候，我們也已經傳入了長度了，可以酌情使用。

當bert+bilstm之後，一般而言bert微調的學習率和bilstm的學習率是要設定成不同的，比如一下程式碼：

# 超引數設定
            global_step = tf.Variable(0, name='step', trainable=False)
            learning_rate = tf.train.exponential_decay(config.learning_rate, global_step, config.decay_step,
                                                       config.decay_rate, staircase=True)

            normal_optimizer = tf.train.AdamOptimizer(learning_rate)  # 下接結構的學習率

            all_variables = graph.get_collection('trainable_variables')
            word2vec_var_list = [x for x in all_variables if 'bert' in x.name]  # BERT的引數
            normal_var_list = [x for x in all_variables if 'bert' not in x.name]  # 下接結構的引數
            print('bert train variable num: {}'.format(len(word2vec_var_list)))
            print('normal train variable num: {}'.format(len(normal_var_list)))
            normal_op = normal_optimizer.minimize(model.loss, global_step=global_step, var_list=normal_var_list)
            num_batch = int(train_iter.num_records / config.batch_size * config.train_epoch)
            embed_step = tf.Variable(0, name='step', trainable=False)
            if word2vec_var_list:  # 對BERT微調
                print('word2vec trainable!!')
                word2vec_op, embed_learning_rate, embed_step = create_optimizer(
                    model.loss, config.embed_learning_rate, num_train_steps=num_batch,
                    num_warmup_steps=int(num_batch * 0.05) , use_tpu=False ,  variable_list=word2vec_var_list
                )

                train_op = tf.group(normal_op, word2vec_op)  # 組裝BERT與下接結構引數
            else:
                train_op = normal_op

一般bert+bilstm之後還需要接一個crf（條件隨機場），我們下節繼續。

命名實體識別之bert+bilstm（基於tensorflow）

接下來我們繼續對官方基於bert的模型進行擴充套件，之前的可參考：基於bert命名實體識別（一）資料處理

命名實體識別之動態融合不同bert層的特徵（基於tensorflow）

num_labels = self.config.relation_num bert_config_file = self.config.bert_config_file bert_config = BertConfig.from_json_file(bert_config_file)

命名實體識別之建立訓練資料

1、讀取txt中的檔案得到資料 def load_data(data_file): \"\"\" 讀取BIO的資料 :param file: :return:

Cifar10資料集的下載和匯入，windows和linux（基於tensorflow）

技術標籤：tensorflow 許多博主都提到了如何下載，在哪下載，但是到了最重要的一步：如何匯入資料集，都是草草帶過，這對於新手來說很不友好，因此寫下這篇文章。

基於bert命名實體識別（一）資料處理

要使用官方的tensorflow版本的bert微調進行自己的命名實體識別，需要處理資料成bert相應的格式，主要是在run_classifier.py中，比如說：

python實現命名實體識別指標（實體級別）

pre = \"0 0 B_SONG I_SONG I_SONG 0 B_SONG I_SONG I_SONG 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O\"

跟我讀論文丨ACL2021 NER BERT化隱馬爾可夫模型用於多源弱監督命名實體識別

摘要：本文是對ACL2021 NER BERT化隱馬爾可夫模型用於多源弱監督命名實體識別這一論文工作進行初步解讀。

論文解讀：ACL2021 NER | 基於模板的BART命名實體識別

摘要：本文是對ACL2021 NER 基於模板的BART命名實體識別這一論文工作進行初步解讀。

mysql事件之修改事件（ALTER EVENT）、禁用事件（DISABLE）、啟用事件（ENABLE）、事件重新命名及資料庫事件遷移操作詳解

本文例項講述了mysql事件之修改事件（ALTER EVENT）、禁用事件（DISABLE）、啟用事件（ENABLE）、事件重新命名及資料庫事件遷移操作。分享給大家供大家參考，具體如下：

【Python實戰專案】針對醫療資料進行命名實體識別

一．什麼是命名實體識別二．基於NLTK的命名實體識別三．基於Stanford的NER 四．【實戰案例】醫學糖尿病資料命名實體識別

命名實體識別競賽中的經驗過擬合

實體競賽最重要的是分數，而分數則對應著資料打標者當時對資料進行標註的心態。所以，有時候資料預處理的方法並不是理論上完美，實際就無敵的。因為打標者的心態不可知。

TENER: Adapting Transformer Encoder for Named Entity Recognition TENER命名實體識別思維導圖筆記

TENER 命名實體識別論文 TENER: Adapting Transformer Encoder for Named Entity RecognitionTENER:適用於命名實體識別的改進Transformer作者: Hang Yan, Bocao Deng等單位:復旦大學發表會議及時間: ACL2

基於RYU應用開發之負載均衡（原始碼開放）

>>> 編者按：本文介紹的是如何在RYU上通過使用selectgroup來實現multipath，從而實現流量的排程，完成簡單的負載均衡Demo。在OpenFlow13中有grouptable,可用於實現組播和冗餘容災等功能。實驗

命名實體識別資料預處理

背景：從提供的金融文字中識別出未出現的未知金融實體一、簡單的熟悉資料

Android之Activity啟動流程詳解（基於api28）

前言 Activity作為Android四大元件之一，他的啟動絕對沒有那麼簡單。這裡涉及到了系統服務程序，啟動過程細節很多，這裡我只展示主體流程。activity的啟動流程隨著版本的更替，程式碼細節一直在進行更改，每次都會有

JavaScript資料結構之佇列結構（基於陣列實現）

技術標籤：資料結構與演算法javascript佇列js資料結構文章目錄一、佇列是什麼？二、佇列常見的應用三、佇列的實現三、佇列的常見操作四、使用步驟1.建立佇列Queue類：2.使用佇列：3.控制檯

【NLP學習其一】什麼是命名實體識別NER?

命名實體識別概念命名實體識別(Named Entity Recognition,簡稱NER) , 是指識別文字中具有特定意義的詞（實體），主要包括人名、地名、機構名、專有名詞等等,並把我們需要識別的詞在文字序列中標註出來。

「免費開源」基於Vue和Quasar的前端SPA專案crudapi後臺管理系統實戰之聯合索引（十一）

基於Vue和Quasar的前端SPA專案實戰之聯合索引（十一）回顧通過之前文章基於Vue和Quasar的前端SPA專案實戰之動態表單（五）的介紹，關於表單元資料配置相關內容已經實現了，本文主要介紹聯合索引功能的實現。

論文解讀丨圖神經網路應用於半結構化文件的命名實體識別和關係提取

摘要：隨著用於傳遞和記錄業務資訊的管理文件的廣泛使用，能夠魯棒且高效地從這些文件中自動提取和理解內容的方法成為一個迫切的需求。本次解讀的文章提出利用圖神經網路來解決半結構化文件中的實體識別（NER）和關

mysql事件之修改事件（ALTER EVENT）&禁用事件（DISABLE）&啟用事件（ENABLE）&事件重新命名&資料庫事件遷移

我們要知道，MySQL允許我們更改現有事件的各種屬性。如果我們要更改現有事件，可以使用ALTER EVENT語句，如下所示：

命名實體識別之bert+bilstm（基於tensorflow）

相關推薦