1. 程式人生 > >tensorflow 多gpu訓練

tensorflow 多gpu訓練

當使用多個gpu訓練時,輸入資料為batch_size*num_gpu,這樣模型訓練時間可以大大較小.

tensorflow中使用制定gpu可以通過tf.device()實現.例如我想使用0號顯示卡:

gpu_ind=0
with tf.device("/gpu:{}".format(gpu_ind))

關於多gpu模型定義檔案為OpenSeq2Seq/model/model_base.py

首先將定義輸入資料,並拆分為多個gpu的輸入:

# placeholders for feeding data
self.x = tf.placeholder(tf.int32, [self.global
_batch_size, None]) self.x_length = tf.placeholder(tf.int32, [self.global_batch_size]) self.y = tf.placeholder(tf.int32, [self.global_batch_size, None]) self.y_length = tf.placeholder(tf.int32, [self.global_batch_size]) # below we follow data parallelism for multi-GPU training # actual per GPU data feeds
xs = tf.split(value=self.x, num_or_size_splits=num_gpus, axis=0) x_lengths = tf.split(value=self.x_length, num_or_size_splits=num_gpus, axis=0) ys = tf.split(value=self.y, num_or_size_splits=num_gpus, axis=0) y_lengths = tf.split(value=self.y_length, num_or_size_splits=num_gpus, axis=0)

對於num_gpu個gpu,分別採用tf.device()指定顯示卡號,定義模型損失函式,更新策略等,程式碼如下:

eval_ops = []
losses = []
for gpu_ind in range(0, num_gpus):
  with tf.device("/gpu:{}".format(gpu_ind)), tf.variable_scope(
    name_or_scope=tf.get_variable_scope(),
    # re-using variables across GPUs.
    reuse=force_var_reuse or (gpu_ind > 0)):
    deco_print("Building graph on GPU:{}".format(gpu_ind))
    if self.mode == "train" or self.mode == "eval":
      sample_ops, loss_i = self._build_forward_pass_graph(source_sequence = xs[gpu_ind],
                                                          src_length=x_lengths[gpu_ind],
                                                          target_sequence = ys[gpu_ind],
                                                          tgt_length=y_lengths[gpu_ind],
                                                          gpu_id=gpu_ind)
      losses.append(loss_i)
      if self.mode == "eval":
        eval_ops.append(sample_ops)

    elif self.mode == "infer":
      self._build_forward_pass_graph(source_sequence = xs[gpu_ind],
                                     src_length=x_lengths[gpu_ind],
                                     gpu_id=gpu_ind)
    else:
      raise ValueError("Unknown mode")
# end of for gpu_ind loop

if self.mode != "infer":
  self._eval_ops = eval_ops
  self._eval_y = ys
  self.loss = tf.reduce_mean(losses)


def exp_decay(learning_rate, var_global_step):
  new_lr = tf.train.exponential_decay(learning_rate=learning_rate,
                                      global_step=var_global_step,
                                      decay_steps=self.model_params['decay_steps'],
                                      decay_rate=self.model_params['decay_rate'],
                                      staircase=self.model_params['use_staircase_decay'])
  boundaries = [self.model_params['begin_decay_at']]
  values = [learning_rate, new_lr]
  min_rate = self.model_params['min_learning_rate']
  final_lr = tf.maximum(tf.train.piecewise_constant(
    x=tf.to_int32(var_global_step),
    boundaries=boundaries,
    values=values), min_rate)
  self._lr = final_lr
  return final_lr

lr_decay_fn = exp_decay if 'use_decay' in self.model_params and self.model_params['use_decay'] == True else None

if self.model_params['optimizer'].lower() == 'momentum':
  optimizer = tf.train.MomentumOptimizer(learning_rate=self.model_params['learning_rate'],
                                         momentum=0.9 if 'opt_momentum' not in self.model_params else
                                         self.model_params['opt_momentum'])
else:
  optimizer = self.model_params['optimizer']


if self._mode == "train":
  self._lr = tf.Variable(initial_value=self.model_params['learning_rate'], trainable=False)
  self.train_op = tf.contrib.layers.optimize_loss(
    loss = self.loss,
    global_step = tf.contrib.framework.get_global_step(),
    learning_rate = self.model_params['learning_rate'],
    optimizer = optimizer,
    gradient_noise_scale = None,
    gradient_multipliers = None,
    clip_gradients = None if 'max_grad_norm' not in self.model_params else self.model_params['max_grad_norm'],
    learning_rate_decay_fn = lr_decay_fn,
    update_ops = None,
    variables = None,
    name = "Loss_Optimization",
    summaries=["learning_rate", "loss", "gradients", "gradient_norm"],
    colocate_gradients_with_ops = True,
    increment_global_step = True
  )

  print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
  print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
  deco_print("Trainable variables:")
  total_params = 0
  for var in tf.trainable_variables():
    var_params = 1
    for dim in var.get_shape():
      var_params *= dim.value
    total_params += var_params
    print('Name: {}    |    Shape: {}    |    Dtype: {}'.format(var.name, tf.shape(var), var.dtype))
  deco_print('Total trainable parameters: %d' % total_params)