1. 程式人生 > >動手學深度學習(一)——線性迴歸從零開始

動手學深度學習(一)——線性迴歸從零開始

文章作者:Tyan
部落格:noahsnail.com  |  CSDN  |  簡書

注:本文為李沐大神的《動手學深度學習》的課程筆記!

參考資料

# 匯入mxnet的ndarray, autograd
from mxnet import autograd
from mxnet import ndarray as nd

建立資料集

# 訓練資料的維度
num_inputs = 2

# 訓練資料的樣本數量
num_examples = 1000

# 實際的權重w
true_w = [2, -3.4]

# 實際的偏置b
true_b = 4.2

# 隨機生成均值為0, 方差為1, 服從正態分佈的訓練資料X, 
X = nd.random_normal(shape=(num_examples, num_inputs)) # 根據X, w, b生成對應的輸出y y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b # 給y加上隨機噪聲 y += 0.01 * nd.random_normal(shape=y.shape)

資料視覺化

print(X[0], y[0])
(
[ 1.16307867  0.48380461]
<NDArray 2 @cpu(0)>, 
[ 4.87962484]
<NDArray 1 @cpu(0)>)
%matplotlib inline
import matplotlib.pyplot as plt

# 繪製資料的散點圖 
plt.scatter(X[:, 1].asnumpy(), y.asnumpy())
plt.show()

Figure 1

資料讀取

import random

# 訓練時的批資料大小
batch_size = 10

# 通過yield進行資料讀取
def data_iter():
    # 產生樣本的索引
    idx = list(range(num_examples))
    # 將索引隨機打亂
    random.shuffle(idx)
    # 迭代一個epoch, xrange迴圈時效率比range更高
for i in xrange(0, num_examples, batch_size): # 依次取出樣本的索引, 這種實現方式在num_examples/batch_size不能整除時也適用 j = nd.array(idx[i:min((i + batch_size), num_examples)]) # 根據提供的索引取元素 yield nd.take(X, j), nd.take(y, j)
# 檢視data_iter是否是generator函式
from inspect import isgeneratorfunction 
print isgeneratorfunction(data_iter)

# data_iter類似於類的定義, 而data_iter()相當於一個類的例項, 當然是匿名例項
import types 
print isinstance(data_iter(), types.GeneratorType)

# 讀取資料測試
for data, label in data_iter():
    print(data, label)
    break
True
True
(
[[ 1.18770552 -0.46362698]
 [-3.15577412  2.19352984]
 [-0.45067298 -0.96665388]
 [ 0.05416773 -1.21203637]
 [-1.49418294 -1.61555624]
 [-0.93778831 -1.69338322]
 [ 0.91439158  1.31797135]
 [ 0.82403505  0.33020774]
 [-0.19660901  1.13431609]
 [ 0.15364595  1.01133049]]
<NDArray 10x2 @cpu(0)>, 
[ 8.17057896 -9.57918072  6.58949089  8.41831684  6.69815683  8.08473206
  1.54548573  4.73358202 -0.0632825   1.06603777]
<NDArray 10 @cpu(0)>)

初始化模型引數

# 隨機初始化權重w
w = nd.random_normal(shape=(num_inputs, 1))
# 偏置b初始化為0
b = nd.zeros((1,))
# w, b放入list裡
params = [w, b]

# 需要計算反向傳播, 新增自動求導
for param in params:
    param.attach_grad()

定義模型

# 定義運算y = w * x + b
def net(X):
    # 向量, 矩陣乘用dot
    return nd.dot(X, w) + b

損失函式

# 定義平方損失
def square_loss(yhat, y):
    # 注意這裡我們把y變形成yhat的形狀來避免矩陣形狀的自動轉換
    # loss為預測值減去真實值
    return (yhat - y.reshape(yhat.shape)) ** 2

優化

# 定義隨機梯度下降法
def SGD(params, lr):
    # 對引數進行梯度下降
    for param in params:
        # 這樣寫不會建立新的param, 而是會寫在原來的param裡, 新的param沒有梯度
        param[:] = param - lr * param.grad

資料視覺化

# 模型函式
def real_fn(X):
    return true_w[0] * X[:, 0] - true_w[1] * X[:, 1] + true_b

# 繪製損失隨訓練迭代次數變化的折線圖,以及預測值和真實值的散點圖
def plot(losses, X, sample_size=100):
    xs = list(range(len(losses)))
    # 繪製兩個子圖
    fig, (ax1, ax2) = plt.subplots(1, 2)
    # 子圖一設定標題
    ax1.set_title('Loss during training')
    # 繪製loss影象, 藍色實線
    ax1.plot(xs, losses, '-b')
    # 子圖二設定標題
    ax2.set_title('Estimated vs Real Function')
    # 繪製預測值, 藍色的小圈
    ax2.plot(X[:sample_size, 0].asnumpy(), net(X[:sample_size, :]).asnumpy(), 'ob', label = 'Estimated')
    # 繪製實際值, 綠色的星號
    ax2.plot(X[:sample_size, 0].asnumpy(), real_fn(X[:sample_size, :]).asnumpy(), '*g', label = 'Real Value')
    # 繪製圖例
    ax2.legend()
    # 顯示影象
    plt.show()

訓練

# 定義訓練的迭代週期
epochs = 5
# 定義學習率
learning_rate = 0.01
# 迭代次數
niter = 0
# 儲存loss
losses = []
# 移動平均損失(加權)
moving_loss = 0
# 指數平滑係數
smoothing_constant = 0.01

# 訓練
for epoch in xrange(epochs):
    # 總的loss
    total_loss = 0
    # 迭代訓練
    for data, label in data_iter():
        # 記錄梯度
        with autograd.record():
            # 計算預測值
            output = net(data)
            # 計算loss
            loss = square_loss(output, label)
        # 根據loss進行反向傳播計算梯度
        loss.backward()
        # 使用隨機梯度下降求解(BSGD)
        SGD(params, learning_rate)
        # 計算總的loss
        total_loss += nd.sum(loss).asscalar()

        # 記錄每讀取一個數據點後,損失的移動平均值的變化
        # 迭代次數加一
        niter += 1
        # 計算當前損失
        current_loss = nd.mean(loss).asscalar()
        # 計算移動平均損失,指數平滑方法
        moving_loss = (1 - smoothing_constant) * moving_loss + smoothing_constant * current_loss
        # 計算估計損失
        est_loss = moving_loss / (1 - (1 - smoothing_constant) ** niter)

        # 輸出迭代資訊
        if (niter + 1) % 100 == 0:
            # 儲存估計損失
            losses.append(est_loss)
            print 'Epoch %s, batch %s. Moving average of loss: %s. Average loss: %f' % (epoch, niter, est_loss, total_loss / num_examples)
            plot(losses, X)
Epoch 0, batch 99. Moving average of loss: 0.378590331091. Average loss: 0.625015

Epoch 0

Epoch 1, batch 199. Moving average of loss: 0.10108379838. Average loss: 0.000099

Epoch 1

Epoch 2, batch 299. Moving average of loss: 0.033726038259. Average loss: 0.000099

Epoch 2

Epoch 3, batch 399. Moving average of loss: 0.0120152144263. Average loss: 0.000099

Epoch 3

Epoch 4, batch 499. Moving average of loss: 0.00441111205064. Average loss: 0.000101

Epoch 4

print w 
print true_w
print b
print true_b
[[ 1.99982905]
 [-3.40232825]]
<NDArray 2x1 @cpu(0)>
[2, -3.4]

[ 4.20024347]
<NDArray 1 @cpu(0)>
4.2

其他學習率

learning_rate = 0.001

Epoch 0, batch 99. Moving average of loss: 4.20676625843. Average loss: 5.549237
Epoch 1, batch 199. Moving average of loss: 1.1782055765. Average loss: 0.098550
Epoch 2, batch 299. Moving average of loss: 0.393321947036. Average loss: 0.001857
Epoch 3, batch 399. Moving average of loss: 0.13944143045. Average loss: 0.000127
Epoch 4, batch 499. Moving average of loss: 0.0505110244825. Average loss: 0.000096
learning_rate = 0.1

Epoch 0, batch 99. Moving average of loss: 3.79341099229e+13. Average loss: 26080307360862.457031
Epoch 1, batch 199. Moving average of loss: 1.7174457145e+28. Average loss: 15303785876879711197739352064.000000
Epoch 2, batch 299. Moving average of loss: nan. Average loss: nan
Epoch 3, batch 399. Moving average of loss: nan. Average loss: nan
Epoch 4, batch 499. Moving average of loss: nan. Average loss: nan
learning_rate = 1

Epoch 0, batch 99. Moving average of loss: nan. Average loss: nan
Epoch 1, batch 199. Moving average of loss: nan. Average loss: nan
Epoch 2, batch 299. Moving average of loss: nan. Average loss: nan
Epoch 3, batch 399. Moving average of loss: nan. Average loss: nan
Epoch 4, batch 499. Moving average of loss: nan. Average loss: nan

程式碼地址