tensorflow 多gpu訓練
阿新 • • 發佈:2019-02-02
當使用多個gpu訓練時,輸入資料為batch_size*num_gpu,這樣模型訓練時間可以大大較小.
tensorflow中使用制定gpu可以通過tf.device()實現.例如我想使用0號顯示卡:
gpu_ind=0
with tf.device("/gpu:{}".format(gpu_ind))
關於多gpu模型定義檔案為OpenSeq2Seq/model/model_base.py
首先將定義輸入資料,並拆分為多個gpu的輸入:
# placeholders for feeding data
self.x = tf.placeholder(tf.int32, [self.global _batch_size, None])
self.x_length = tf.placeholder(tf.int32, [self.global_batch_size])
self.y = tf.placeholder(tf.int32, [self.global_batch_size, None])
self.y_length = tf.placeholder(tf.int32, [self.global_batch_size])
# below we follow data parallelism for multi-GPU training
# actual per GPU data feeds
xs = tf.split(value=self.x, num_or_size_splits=num_gpus, axis=0)
x_lengths = tf.split(value=self.x_length, num_or_size_splits=num_gpus, axis=0)
ys = tf.split(value=self.y, num_or_size_splits=num_gpus, axis=0)
y_lengths = tf.split(value=self.y_length, num_or_size_splits=num_gpus, axis=0)
對於num_gpu個gpu,分別採用tf.device()指定顯示卡號,定義模型損失函式,更新策略等,程式碼如下:
eval_ops = []
losses = []
for gpu_ind in range(0, num_gpus):
with tf.device("/gpu:{}".format(gpu_ind)), tf.variable_scope(
name_or_scope=tf.get_variable_scope(),
# re-using variables across GPUs.
reuse=force_var_reuse or (gpu_ind > 0)):
deco_print("Building graph on GPU:{}".format(gpu_ind))
if self.mode == "train" or self.mode == "eval":
sample_ops, loss_i = self._build_forward_pass_graph(source_sequence = xs[gpu_ind],
src_length=x_lengths[gpu_ind],
target_sequence = ys[gpu_ind],
tgt_length=y_lengths[gpu_ind],
gpu_id=gpu_ind)
losses.append(loss_i)
if self.mode == "eval":
eval_ops.append(sample_ops)
elif self.mode == "infer":
self._build_forward_pass_graph(source_sequence = xs[gpu_ind],
src_length=x_lengths[gpu_ind],
gpu_id=gpu_ind)
else:
raise ValueError("Unknown mode")
# end of for gpu_ind loop
if self.mode != "infer":
self._eval_ops = eval_ops
self._eval_y = ys
self.loss = tf.reduce_mean(losses)
def exp_decay(learning_rate, var_global_step):
new_lr = tf.train.exponential_decay(learning_rate=learning_rate,
global_step=var_global_step,
decay_steps=self.model_params['decay_steps'],
decay_rate=self.model_params['decay_rate'],
staircase=self.model_params['use_staircase_decay'])
boundaries = [self.model_params['begin_decay_at']]
values = [learning_rate, new_lr]
min_rate = self.model_params['min_learning_rate']
final_lr = tf.maximum(tf.train.piecewise_constant(
x=tf.to_int32(var_global_step),
boundaries=boundaries,
values=values), min_rate)
self._lr = final_lr
return final_lr
lr_decay_fn = exp_decay if 'use_decay' in self.model_params and self.model_params['use_decay'] == True else None
if self.model_params['optimizer'].lower() == 'momentum':
optimizer = tf.train.MomentumOptimizer(learning_rate=self.model_params['learning_rate'],
momentum=0.9 if 'opt_momentum' not in self.model_params else
self.model_params['opt_momentum'])
else:
optimizer = self.model_params['optimizer']
if self._mode == "train":
self._lr = tf.Variable(initial_value=self.model_params['learning_rate'], trainable=False)
self.train_op = tf.contrib.layers.optimize_loss(
loss = self.loss,
global_step = tf.contrib.framework.get_global_step(),
learning_rate = self.model_params['learning_rate'],
optimizer = optimizer,
gradient_noise_scale = None,
gradient_multipliers = None,
clip_gradients = None if 'max_grad_norm' not in self.model_params else self.model_params['max_grad_norm'],
learning_rate_decay_fn = lr_decay_fn,
update_ops = None,
variables = None,
name = "Loss_Optimization",
summaries=["learning_rate", "loss", "gradients", "gradient_norm"],
colocate_gradients_with_ops = True,
increment_global_step = True
)
print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
deco_print("Trainable variables:")
total_params = 0
for var in tf.trainable_variables():
var_params = 1
for dim in var.get_shape():
var_params *= dim.value
total_params += var_params
print('Name: {} | Shape: {} | Dtype: {}'.format(var.name, tf.shape(var), var.dtype))
deco_print('Total trainable parameters: %d' % total_params)