1. 程式人生 > >Tensorflow筆記(一)

Tensorflow筆記(一)

Tensorflow Note(一)

標籤(空格分隔): tensorflow筆記


卷積中的SAME和VALID

1.在進行卷積時,對於padding引數可以選擇VALID或者SAME, 用例項來說明二者的不同:

  • SAME,即保證輸出的卷積結果和輸入tensor的長度和寬度保持不變。所以會進行補零操作,在原始tensor周圍補零,直到輸出tensor和輸入tensorflow尺寸相同。
input_layer = tf.reshape(tf.linspace(start=1.0, stop=25.0, num=25), [1, 5
, 5, 1]) weights = tf.ones(shape=[5, 5, 1, 1]) convolution = tf.nn.conv2d(input_layer, weights, strides=[1, 1, 1, 1], padding='SAME') init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) print(sess.run(convolution)) """ [[[[ 63.] [ 90.] [ 120.] [ 102.] [ 81.]] [[ 114.] [ 160.] [ 210.] [ 176.] [ 138.]] [[ 180.] [ 250.] [ 325.] [ 270.] [ 210.]] [[ 174.] [ 240.] [ 310.] [ 256.] [ 198.]] [[ 153.] [ 210.] [ 270.] [ 222.] [ 171.]]]] """
  • VALID,即不補零,只有影象中有效的元素進行運算,如果超出範圍就不計算,所以輸出tensor的尺寸必然小於輸入tensor的尺寸。
input_layer = tf.reshape(tf.linspace(start=1.0, stop=25.0, num=25), [1, 5, 5, 1])
weights = tf.ones(shape=[5, 5, 1, 1])

convolution = tf.nn.conv2d(input_layer, weights, strides=[1, 1, 1, 1], padding='VALID')

init = tf.global_variables_initializer()

with
tf.Session() as sess: sess.run(init) print(sess.run(convolution)) """ [[[[ 325.]]]] """

Tensorflow中tf.app.flags

1.tf.app.flags用於支援接收命令列傳遞的引數,相當於接收argv。
2.可以使用python test.py --learning_rate 0.1 --epoch_num 5 --mode test進行引數傳遞

import tensorflow as tf

flags = tf.app.flags
flags.DEFINE_float("learning_rate", 0.01, "learning_rate_description")
flags.DEFINE_integer("epoch_num", 10, "Number of epochs to run trainer")
flags.DEFINE_string("mode", "train", "Option mode: train, train_from_scratch, inference")

# Provides the global object that can be used to access flags.
FLAGS = flags.FLAGS


def main(_):
    print(FLAGS.learning_rate)
    print(FLAGS.epoch_num)
    print(FLAGS.mode)


if __name__ == '__main__':
    tf.app.run()

"""
0.01
10
train
"""

Tensorflow中共享變數

目的:

當我們需要共享大量變數集,並且還想在一個地方初始化所有變數時,我們就需要共享變數。

函式:

tensorflow中有兩個產生變數的函式:一個是tf.get_variable(),另外一個是tf.Varibale()。通常對於新手我們都會使用後者,前者和後者的區別在於前者擁有變數檢查機制,會檢查已經存在的變數是否設定為共享變數,如果已經存在的變數沒有設定為共享變數,tensorflow在執行到第二個擁有相同名字的變數時,會報錯。

v1 = tf.get_variable('variable', shape=[1], initializer=tf.initializers.constant(0))
v2 = tf.get_variable('variable', shape=[1], initializer=tf.initializers.constant(1))

# ValueError: Variable variable already exists, disallowed.
# Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:...
v1 = tf.Variable(0, 'variable')
# name ='Variable:0'
v2 = tf.Variable(1, 'variable')
# name = 'Variable_1:0'

可以看出tensorflow如果使用後者那麼將不會報錯,系統會給重名變數起不同的名字,但是第一個就不會了所以報錯。

假設我們要實現一個簡單的卷積然後啟用的函式,那麼如果我們使用tf.Variable(),程式碼是下面這樣的

def my_image_filter(input_images):
    conv1_weights = tf.Variable(tf.random_normal([5, 5, 32, 32]),
        name="conv1_weights")
    conv1_biases = tf.Variable(tf.zeros([32]), name="conv1_biases")
    conv1 = tf.nn.conv2d(input_images, conv1_weights,
        strides=[1, 1, 1, 1], padding='SAME')
    relu1 = tf.nn.relu(conv1 + conv1_biases)

    conv2_weights = tf.Variable(tf.random_normal([5, 5, 32, 32]),
        name="conv2_weights")
    conv2_biases = tf.Variable(tf.zeros([32]), name="conv2_biases")
    conv2 = tf.nn.conv2d(relu1, conv2_weights,
        strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv2 + conv2_biases)


image1 = tf.ones(shape=[1, 255, 255, 32])
image2 = tf.zeros(shape=[1, 255, 255, 32])
result1 = my_image_filter(image1)
result2 = my_image_filter(image2)

當我們想通過擁有同一引數的同一個過濾器來過濾兩張圖片,這樣會產生兩組變數。tensorflow中提供了變數作用域機制。

變數作用域機制在tensorflow中主要由兩部分構成:

  • tf.get_variable(<name>, <shape>, <initializer>):通過所給的名字建立或者返回一個變數
  • tf.variable_scope(<scope_name>):通過tf.get_variable()為變數指定名稱空間
def conv_relu(input, kernel_shape, bias_shape):
    weights = tf.get_variable('weights', kernel_shape, initializer=tf.random_normal_initializer())
    biases = tf.get_variable('biases', bias_shape, initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input, weights, strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv+biases)


def my_image_filter(input_images):
    with tf.variable_scope('conv1'):
        relu1 = conv_relu(input_images, [5, 5, 32, 32], [32])
    with tf.variable_scope('conv2'):
        return conv_relu(relu1, [5, 5, 32, 32], [32])


image1 = tf.ones(shape=[1, 255, 255, 32])
image2 = tf.zeros(shape=[1, 255, 255, 32])
result1 = my_image_filter(image1)
result2 = my_image_filter(image2)
# ValueError: Variable conv1/weights already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

如果我們想要共享變數需要通過reuse_variables()這個方法來指定

def conv_relu(input, kernel_shape, bias_shape):
    weights = tf.get_variable('weights', kernel_shape, initializer=tf.random_normal_initializer())
    biases = tf.get_variable('biases', bias_shape, initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input, weights, strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv+biases)


def my_image_filter(input_images):
    with tf.variable_scope('conv1'):
        relu1 = conv_relu(input_images, [5, 5, 32, 32], [32])
    with tf.variable_scope('conv2'):
        return conv_relu(relu1, [5, 5, 32, 32], [32])


image1 = tf.ones(shape=[1, 255, 255, 32])
image2 = tf.zeros(shape=[1, 255, 255, 32])
with tf.variable_scope('image_filters') as scope:
    result1 = my_image_filter(image1)
    scope.reuse_variables()
    result2 = my_image_filter(image2)

下面我們看下tensorflow中變數的命名:

v1 = tf.Variable(initial_value=1.0, name='v')
print(v1.name)
v2 = tf.Variable(initial_value=1.0, name='v')
print(v2.name)
with tf.variable_scope('foo'):
    with tf.variable_scope('bar') as scope:
        v = tf.get_variable('v', [1])
        scope.reuse_variables()
        v2 = tf.get_variable('v', [1])
        v3 = tf.Variable(initial_value=1.0, name='v')
        v4 = tf.Variable(initial_value=1.0, name='v')
        print(v.name)
        print(v2.name)
        print(v3.name)
        print(v4.name)

'''
v:0
v_1:0
foo/bar/v:0
foo/bar/v:0
foo/bar/v_1:0
foo/bar/v_2:0
'''

Tensorflow中梯度函式

gradient函式

def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None,
              stop_gradients=None):
  """Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`.

tensorflow中通常是先計算loss對所有模型中引數變數的梯度,然後將變數進行更新,所以如果我們只是獲取梯度,其實並沒有對變數進行更新。

stop_gradient函式

這個函式是用於在計算梯度時,可以遮蔽掉某個變數對梯度的影響,即計算梯度是不計算這個變數對其的貢獻,同時也不會對這個變數進行更新。

def stop_gradient(input, name=None):
  r"""Stops gradient computation.

  When executed in a graph, this op outputs its input tensor as-is.

  When building ops to compute gradients, this op prevents the contribution of
  its inputs to be taken into account.  Normally, the gradient generator adds ops
  to a graph to compute the derivatives of a specified 'loss' by recursively
  finding out inputs that contributed to its computation.  If you insert this op
  in the graph it inputs are masked from the gradient generator.  They are not
  taken into account for computing gradients.

  This is useful any time you want to compute a value with TensorFlow but need
  to pretend that the value was a constant. Some examples include:

  *  The *EM* algorithm where the *M-step* should not involve backpropagation
     through the output of the *E-step*.
  *  Contrastive divergence training of Boltzmann machines where, when
     differentiating the energy function, the training must not backpropagate
     through the graph that generated the samples from the model.
  *  Adversarial training, where no backprop should happen through the adversarial
     example generation process.

  Args:
    input: A `Tensor`.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type as `input`.
  """

程式碼分析

import tensorflow as tf

a = tf.constant(0.)
b = 2 * a
g = tf.gradients(a + b, [a, b], stop_gradients=[a, b])

a1 = tf.stop_gradient(tf.constant(0.))
b1 = tf.stop_gradient(2 * a1)
g1 = tf.gradients(a1+b1, [a1, b1])

a2 = tf.constant(0.)
b2 = 2 * a2
g2 = tf.gradients(a2+b2, [a2, b2])
with tf.Session() as sess:
    print(sess.run(g))
    #output:[1.0, 1.0]
    print(sess.run(g1))
    #output:[1.0, 1.0]
    print(sess.run(g2))
    #output:[3.0, 1.0]

另一份示例程式碼(注意這裡使用的梯度下降是最簡單的)

import tensorflow as tf
import numpy as np

x_data = np.linspace(start=1.0, stop=100.0, num=100)
y_data = np.linspace(start=1.0, stop=100.0, num=100)

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

w = tf.Variable(tf.random_normal([1]))
b = tf.Variable(initial_value=1.0)

# b = tf.stop_gradient(b)
predict_y = w * x + b
loss = tf.reduce_sum(tf.square(predict_y-y))
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
#gradient = optimizer.compute_gradients(loss)
#train_op = optimizer.apply_gradients(gradient)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for _ in range(100):
        for index in range(len(x_data)):
            sess.run(train_op, feed_dict={x: x_data[index], y: y_data[index]})
            print(sess.run([w, b, loss], feed_dict={x: x_data[index], y: y_data[index]}))
"""
.....
[array([ nan], dtype=float32), inf, nan]
[array([ nan], dtype=float32), nan, nan]
[array([ nan], dtype=float32), nan, nan]
[array([ nan], dtype=float32), nan, nan]
[array([ nan], dtype=float32), nan, nan]
[array([ nan], dtype=float32), nan, nan]
[array([ nan], dtype=float32), nan, nan]
[array([ nan], dtype=float32), nan, nan]
"""

換了一種梯度下降方法(Adam)

import tensorflow as tf
import numpy as np

x_data = np.linspace(start=1.0, stop=100.0, num=100)
y_data = np.linspace(start=1.0, stop=100.0, num=100)

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

w = tf.Variable(tf.random_normal([1]))
b = tf.Variable(initial_value=1.0)

# b = tf.stop_gradient(b)
predict_y = w * x + b
loss = tf.reduce_sum(tf.square(predict_y-y))
train_op = tf.train.AdamOptimizer(0.01).minimize(loss)
#gradient = optimizer.compute_gradients(loss)
#train_op = optimizer.apply_gradients(gradient)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for _ in range(100):
        for index in range(len(x_data)):
            sess.run(train_op, feed_dict={x: x_data[index], y: y_data[index]})
            print(sess.run([w, b, loss], feed_dict={x: x_data[index], y: y_data[index]}))
"""
.....
[array([ 1.00018632], dtype=float32), -0.01594656, 1.4347606e-06]
[array([ 1.00016069], dtype=float32), -0.015954774, 1.0142103e-06]
[array([ 1.00016844], dtype=float32), -0.015952379, 1.4901161e-08]
[array([ 1.00017369], dtype=float32), -0.015950751, 3.0174851e-07]
[array([ 1.0001514], dtype=float32), -0.015957674, 2.0354637e-06]
[array([ 1.00017929], dtype=float32), -0.015949152, 2.1012966e-06]
[array([ 1.00014257], dtype=float32), -0.01596031, 3.9651641e-06]
[array([ 1.00018048], dtype=float32), -0.015948951, 3.6964193e-06]
[array([ 1.0001328], dtype=float32), -0.015963148, 7.171242e-06]
"""

一個不同梯度下降演算法的總結