Mxnet基礎知識(二)

阿新 • • 發佈：2020-07-13

1 混合式程式設計

　　深度學習框架中，pytorch採用指令式程式設計，tensorflow採用符號式程式設計。mxnet的gluon則嘗試將指令式程式設計和符號式程式設計結合。

1.1 符號式程式設計和指令式程式設計

　　符號式程式設計更加靈活，便於理解和除錯；指令式程式設計能對程式碼進行優化，執行起來效率更高，如下所示：

　　指令式程式設計：程式碼會根據執行順序，逐行執行

#指令式程式設計

def add(a, b):
    return a + b

def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g  
= add(e, f)
    return g

fancy_func(1, 2, 3, 4)

　　符號式程式設計：下面程式碼會通過字串的形式傳給compile，compile能看到所有的程式碼，能對程式碼結構和記憶體進行優化，加快程式碼執行效率

#符號式程式設計

def add_str():
    return '''
def add(a, b):
    return a + b
'''

def fancy_func_str():
    return '''
def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g  
= add(e, f)
    return g
'''

def evoke_str():
    return add_str() + fancy_func_str() + '''
print(fancy_func(1, 2, 3, 4))
'''

prog = evoke_str()
print(prog)
y = compile(prog, '', 'exec')
exec(y)

mxnet構建網路時除了nn.Block和nn.Sequential外，還有nn.HybridBlock和nn.HybridSequential, 實現在構建時通過指令式程式設計方式，程式碼執行時轉變成符號式程式設計。HybridBlock和HybridSequential構建的網路net，通過net.hybride()可以將網路轉變成符號網路圖（symbolic graph），對程式碼結構進行優化，而且mxnet會快取符號圖，隨後的前向傳遞中重複使用符號圖。

#coding:utf-8
from mxnet.gluon import nn
from mxnet import nd

class HybridNet(nn.HybridBlock):
    def __init__(self, **kwargs):
        super(HybridNet, self).__init__(**kwargs)
        self.hidden = nn.Dense(10)
        self.output = nn.Dense(2)

    def hybrid_forward(self, F, x):
        print('F: ', F)
        print('x: ', x)
        x = F.relu(self.hidden(x))
        print('hidden: ', x)
        return self.output(x)

#按原始指令式程式設計方程，逐行執行
net = HybridNet()
net.initialize()
x = nd.random.normal(shape=(1, 4))
net(x)

#net.hybridize()會對程式碼結構進行優化，轉變成符號式程式設計
net.hybridize()
net(x)

#再次執行時，不會列印程式碼中的print部分，這是因為hybride後，構建成符號式程式碼網路，mxnet會快取符號圖，直接執行符號圖，不會再去呼叫python原始程式碼
net(x)

　　另外，繼承自HybridBlock的網路需要實現的是hybrid_forward()相比於forward()多了一個引數F，F會根據輸入的x型別選擇執行，即x若為mxnet.ndarry，則F呼叫ndarry的方法；若x若為mxnet.symbol，則呼叫symbol的方法。

2. 延遲初始化

　　在構建網路時，mxnet支援不指明引數的輸入尺寸，只需指明引數的輸出尺寸。這是通過延遲初始化實現

from mxnet import init, nd
from mxnet.gluon import nn


def getnet():
    net = nn.Sequential()
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dense(10))
    return net

#網路引數未初始化，無具體值
net = getnet()
print(1, net.collect_params())   #print(1, net[0].weight.data())

#網路引數未初始化，無具體值
net.initialize()
print(2, net.collect_params())  #print(2, net[0].weight.data())

#根據輸入x的尺寸，網路推斷出各層引數的尺寸，然後進行初始化
x = nd.random.uniform(shape=(2, 30))
net(x)
print(3, net.collect_params())
print(3, net[0].weight.data())

#第二次執行時，不會再進行初始化
net(x)

　　init提供了許多初始化方法，如下：

init.Zero()               #初始化為常數0
init.One()                 #初始化為常數1
init.Constant(value=0.05)  #初始化為常數0.05
init.Orthogonal()          #初始化為正交矩陣
init.Uniform(scale=0.07)  #(-0.07, 0.07)之間的隨機分佈
init.Normal(sigma=0.01)  #均值為0, 標準差為0.01的正態分佈
init.Xavier(magnitude=3)  # magnitude初始化， 適合tanh
init.MSRAPrelu(slope=0.25)  #凱明初始化，適合relu

　　自定義初始化：

#第一層和第二層採用不同的方法進行初始化，
# force_reinit：無論網路是否初始化，都重新初始化
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[1].initialize(init=init.Constant(42), force_reinit=True)

#自定義初始化，需要繼承init.Initializer， 並實現 _init_weight
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5   # 絕對值小於5的賦值為0， 大於等於5的保持不變

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

3. 引數和模組命名

　　mxnet網路中的parameter和block都有命名(prefix)， parameter的名字由使用者指定，block的名字由使用者或mxnet自動建立

mydense = nn.Dense(100, prefix="mydense_")
print(mydense.prefix)  #mydense_
print(mydense.collect_params())    #mydense_weight, mydense_bias
 
dense0 = nn.Dense(100)
print(dense0.prefix)      #dense0_
print(dense0.collect_params())  #dense0_weight, dense0_bias

dense1 = nn.Dense(100)     
print(dense1.prefix)   #dense1_
print(dense1.collect_params())  #dense1_weight, dense1_bias

　　每一個block都有一個name_scope(), 在其上下文中建立的子block，會採用其名字作為字首，注意下面model0和model1的名字差別

from mxnet import gluon
import mxnet as mx

class Model(gluon.Block):
    def __init__(self, **kwargs):
        super(Model, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(20)
            self.dense1 = gluon.nn.Dense(20)
            self.mydense = gluon.nn.Dense(20, prefix='mydense_')

    def forward(self, x):
        x = mx.nd.relu(self.dense0(x))
        x = mx.nd.relu(self.dense1(x))
        return mx.nd.relu(self.mydense(x))

model0 = Model()
model0.initialize()
model0(mx.nd.zeros((1, 20)))
print(model0.prefix)         #model0_
print(model0.dense0.prefix)  #model0_dense0_
print(model0.dense1.prefix)  #model0_dense1_
print(model0.mydense.prefix) #model0_mydense_


model1 = Model()
model1.initialize()
model1(mx.nd.zeros((1, 20)))
print(model1.prefix)          #model1_
print(model1.dense0.prefix)   #model1_dense0_
print(model1.dense1.prefix)   #model1_dense1_
print(model1.mydense.prefix)  #model1_mydense_

　　不同的命名，其儲存的引數名字也會有差別，在儲存和載入模型引數時會引起錯誤，如下所示：

#如下方式儲存和載入：model0儲存的引數，model1載入會報錯
model0.collect_params().save('model.params')
try:
    model1.collect_params().load('model.params', mx.cpu())
except Exception as e:
    print(e)
print(model0.collect_params(), '\n')
print(model1.collect_params())


#如下方式儲存和載入：model0儲存的引數，model1載入不會報錯
model0.save_parameters('model.params')
model1.load_parameters('model.params')
print(mx.nd.load('model.params').keys())

在載入預訓練的模型，進行finetune時，注意名稱空間, 如下所示：

#載入預訓練模型，最後一層為1000類別的分類器
alexnet = gluon.model_zoo.vision.alexnet(pretrained=True)
print(alexnet.output)
print(alexnet.output.prefix)

#修改最後一層結構為 100類別的分類器，進行finetune
with alexnet.name_scope():
    alexnet.output = gluon.nn.Dense(100)
alexnet.output.initialize()
print(alexnet.output)

　　Sequential建立的net獲取引數：

from mxnet import init, nd
from mxnet.gluon import nn


net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()  # Use the default initialization method

x = nd.random.uniform(shape=(2, 20))
net(x)            # Forward computation

print(net[0].params)
print(net[1].params)

#通過屬性獲取
print(net[1].bias)
print(net[1].bias.data())
print(net[0].weight.grad())
#通過字典方式獲取
print(net[0].params['dense0_weight'])
print(net[0].params['dense0_weight'].data())
#獲取所有引數
print(net.collect_params())
print(net[0].collect_params())
net.collect_params()['dense1_bias'].data()
#正則匹配
print(net.collect_params('.*weight'))  
print(net.collect_params('dense0.*'))

　　Block建立網路獲取引數：

from mxnet import gluon
import mxnet as mx

class Model(gluon.Block):
    def __init__(self, **kwargs):
        super(Model, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(20)
            self.dense1 = gluon.nn.Dense(20)
            self.mydense = gluon.nn.Dense(20, prefix='mydense_')

    def forward(self, x):
        x = mx.nd.relu(self.dense0(x))
        x = mx.nd.relu(self.dense1(x))
        return mx.nd.relu(self.mydense(x))

model0 = Model()
model0.initialize()
model0(mx.nd.zeros((1, 20)))

#通過有序字典_children
print(model0._children)
print(model0._children['dense0'].weight._data)
print(model0._children['dense0'].bias._data)

#通過收集所有引數
print(model0.collect_params()['model0_dense0_weight']._data)
print(model0.collect_params()['model0_dense0_bias']._data)

　 Parameter和ParameterDict

　　gluon.Parameter類能夠建立網路中的引數，gluon.ParameterDict類是字典，建立了parameter name和parameter例項之間的對映，通過ParameterDict也可以建立parameter.

Parameter的使用

class MyDense(nn.Block):

    def __init__(self, units, in_units, **kwargs):
        # units: the number of outputs in this layer
        # in_units: the number of inputs in this layer

        super(MyDense, self).__init__(**kwargs)
        self.weight = gluon.Parameter('weight', shape=(in_units, units))  #建立名為weight的引數
        self.bias = gluon.Parameter('bias', shape=(units,))    #建立名為bias的引數

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)


net = nn.Sequential()
net.add(MyDense(units=8, in_units=64),
        MyDense(units=1, in_units=8))
#初始化引數
for block in net:
    if hasattr(block, "weight"):
        block.weight.initialize()
    if hasattr(block, "bias"):
        block.bias.initialize()
print(net(nd.random.uniform(shape=(2, 64))))
print(net)

ParameterDict使用

#建立一個parameterdict，包含一個名為param2的parameter
params = gluon.ParameterDict()
params.get('param2', shape=(2, 3))
print(params)
print(params.keys())
print(params['param2'])

Mxnet基礎知識(二)

Mxnet基礎知識(二)

Docker的基礎知識(二)

JavaScript基礎知識二

Maven 基礎知識(二)依賴機制

c++ 基礎知識(二) 函式

vue基礎知識二

web基礎知識(二)

C# 基礎知識系列- 13 常見類庫介紹（二）日期時間類

php socket網路程式設計基礎知識（二）：socket函式

Java之語言基礎知識（二）

軟體測試基礎知識（二）------------等價類劃分法、邊界值分析法、場景法、錯誤推測法、bug定義/型別/優先順序/生命週期/跟蹤管理

【進階之路】Redis基礎知識兩篇就滿足（二）

Python超全乾貨：【二叉樹】基礎知識大全

Redis學習筆記（二）基礎知識

iOS面試基礎知識（二）

js基礎知識概括（二）

SQL 基礎知識梳理（二） - 查詢基礎

【學習筆記】前端常用基礎知識（二）- Jquery如何獲取祖先元素

js基礎知識筆記二

二、計算機基礎知識詳解

Mxnet基礎知識(二)

相關推薦