動手學深度學習(一)——線性迴歸從零開始
阿新 • • 發佈:2018-12-31
文章作者:Tyan
部落格:noahsnail.com | CSDN | 簡書
注:本文為李沐大神的《動手學深度學習》的課程筆記!
參考資料
# 匯入mxnet的ndarray, autograd
from mxnet import autograd
from mxnet import ndarray as nd
建立資料集
# 訓練資料的維度
num_inputs = 2
# 訓練資料的樣本數量
num_examples = 1000
# 實際的權重w
true_w = [2, -3.4]
# 實際的偏置b
true_b = 4.2
# 隨機生成均值為0, 方差為1, 服從正態分佈的訓練資料X,
X = nd.random_normal(shape=(num_examples, num_inputs))
# 根據X, w, b生成對應的輸出y
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
# 給y加上隨機噪聲
y += 0.01 * nd.random_normal(shape=y.shape)
資料視覺化
print(X[0], y[0])
( [ 1.16307867 0.48380461] <NDArray 2 @cpu(0)>, [ 4.87962484] <NDArray 1 @cpu(0)>)
%matplotlib inline
import matplotlib.pyplot as plt
# 繪製資料的散點圖
plt.scatter(X[:, 1].asnumpy(), y.asnumpy())
plt.show()
資料讀取
import random
# 訓練時的批資料大小
batch_size = 10
# 通過yield進行資料讀取
def data_iter():
# 產生樣本的索引
idx = list(range(num_examples))
# 將索引隨機打亂
random.shuffle(idx)
# 迭代一個epoch, xrange迴圈時效率比range更高
for i in xrange(0, num_examples, batch_size):
# 依次取出樣本的索引, 這種實現方式在num_examples/batch_size不能整除時也適用
j = nd.array(idx[i:min((i + batch_size), num_examples)])
# 根據提供的索引取元素
yield nd.take(X, j), nd.take(y, j)
# 檢視data_iter是否是generator函式
from inspect import isgeneratorfunction
print isgeneratorfunction(data_iter)
# data_iter類似於類的定義, 而data_iter()相當於一個類的例項, 當然是匿名例項
import types
print isinstance(data_iter(), types.GeneratorType)
# 讀取資料測試
for data, label in data_iter():
print(data, label)
break
True
True
(
[[ 1.18770552 -0.46362698]
[-3.15577412 2.19352984]
[-0.45067298 -0.96665388]
[ 0.05416773 -1.21203637]
[-1.49418294 -1.61555624]
[-0.93778831 -1.69338322]
[ 0.91439158 1.31797135]
[ 0.82403505 0.33020774]
[-0.19660901 1.13431609]
[ 0.15364595 1.01133049]]
<NDArray 10x2 @cpu(0)>,
[ 8.17057896 -9.57918072 6.58949089 8.41831684 6.69815683 8.08473206
1.54548573 4.73358202 -0.0632825 1.06603777]
<NDArray 10 @cpu(0)>)
初始化模型引數
# 隨機初始化權重w
w = nd.random_normal(shape=(num_inputs, 1))
# 偏置b初始化為0
b = nd.zeros((1,))
# w, b放入list裡
params = [w, b]
# 需要計算反向傳播, 新增自動求導
for param in params:
param.attach_grad()
定義模型
# 定義運算y = w * x + b
def net(X):
# 向量, 矩陣乘用dot
return nd.dot(X, w) + b
損失函式
# 定義平方損失
def square_loss(yhat, y):
# 注意這裡我們把y變形成yhat的形狀來避免矩陣形狀的自動轉換
# loss為預測值減去真實值
return (yhat - y.reshape(yhat.shape)) ** 2
優化
# 定義隨機梯度下降法
def SGD(params, lr):
# 對引數進行梯度下降
for param in params:
# 這樣寫不會建立新的param, 而是會寫在原來的param裡, 新的param沒有梯度
param[:] = param - lr * param.grad
資料視覺化
# 模型函式
def real_fn(X):
return true_w[0] * X[:, 0] - true_w[1] * X[:, 1] + true_b
# 繪製損失隨訓練迭代次數變化的折線圖,以及預測值和真實值的散點圖
def plot(losses, X, sample_size=100):
xs = list(range(len(losses)))
# 繪製兩個子圖
fig, (ax1, ax2) = plt.subplots(1, 2)
# 子圖一設定標題
ax1.set_title('Loss during training')
# 繪製loss影象, 藍色實線
ax1.plot(xs, losses, '-b')
# 子圖二設定標題
ax2.set_title('Estimated vs Real Function')
# 繪製預測值, 藍色的小圈
ax2.plot(X[:sample_size, 0].asnumpy(), net(X[:sample_size, :]).asnumpy(), 'ob', label = 'Estimated')
# 繪製實際值, 綠色的星號
ax2.plot(X[:sample_size, 0].asnumpy(), real_fn(X[:sample_size, :]).asnumpy(), '*g', label = 'Real Value')
# 繪製圖例
ax2.legend()
# 顯示影象
plt.show()
訓練
# 定義訓練的迭代週期
epochs = 5
# 定義學習率
learning_rate = 0.01
# 迭代次數
niter = 0
# 儲存loss
losses = []
# 移動平均損失(加權)
moving_loss = 0
# 指數平滑係數
smoothing_constant = 0.01
# 訓練
for epoch in xrange(epochs):
# 總的loss
total_loss = 0
# 迭代訓練
for data, label in data_iter():
# 記錄梯度
with autograd.record():
# 計算預測值
output = net(data)
# 計算loss
loss = square_loss(output, label)
# 根據loss進行反向傳播計算梯度
loss.backward()
# 使用隨機梯度下降求解(BSGD)
SGD(params, learning_rate)
# 計算總的loss
total_loss += nd.sum(loss).asscalar()
# 記錄每讀取一個數據點後,損失的移動平均值的變化
# 迭代次數加一
niter += 1
# 計算當前損失
current_loss = nd.mean(loss).asscalar()
# 計算移動平均損失,指數平滑方法
moving_loss = (1 - smoothing_constant) * moving_loss + smoothing_constant * current_loss
# 計算估計損失
est_loss = moving_loss / (1 - (1 - smoothing_constant) ** niter)
# 輸出迭代資訊
if (niter + 1) % 100 == 0:
# 儲存估計損失
losses.append(est_loss)
print 'Epoch %s, batch %s. Moving average of loss: %s. Average loss: %f' % (epoch, niter, est_loss, total_loss / num_examples)
plot(losses, X)
Epoch 0, batch 99. Moving average of loss: 0.378590331091. Average loss: 0.625015
Epoch 1, batch 199. Moving average of loss: 0.10108379838. Average loss: 0.000099
Epoch 2, batch 299. Moving average of loss: 0.033726038259. Average loss: 0.000099
Epoch 3, batch 399. Moving average of loss: 0.0120152144263. Average loss: 0.000099
Epoch 4, batch 499. Moving average of loss: 0.00441111205064. Average loss: 0.000101
print w
print true_w
print b
print true_b
[[ 1.99982905]
[-3.40232825]]
<NDArray 2x1 @cpu(0)>
[2, -3.4]
[ 4.20024347]
<NDArray 1 @cpu(0)>
4.2
其他學習率
learning_rate = 0.001
Epoch 0, batch 99. Moving average of loss: 4.20676625843. Average loss: 5.549237
Epoch 1, batch 199. Moving average of loss: 1.1782055765. Average loss: 0.098550
Epoch 2, batch 299. Moving average of loss: 0.393321947036. Average loss: 0.001857
Epoch 3, batch 399. Moving average of loss: 0.13944143045. Average loss: 0.000127
Epoch 4, batch 499. Moving average of loss: 0.0505110244825. Average loss: 0.000096
learning_rate = 0.1
Epoch 0, batch 99. Moving average of loss: 3.79341099229e+13. Average loss: 26080307360862.457031
Epoch 1, batch 199. Moving average of loss: 1.7174457145e+28. Average loss: 15303785876879711197739352064.000000
Epoch 2, batch 299. Moving average of loss: nan. Average loss: nan
Epoch 3, batch 399. Moving average of loss: nan. Average loss: nan
Epoch 4, batch 499. Moving average of loss: nan. Average loss: nan
learning_rate = 1
Epoch 0, batch 99. Moving average of loss: nan. Average loss: nan
Epoch 1, batch 199. Moving average of loss: nan. Average loss: nan
Epoch 2, batch 299. Moving average of loss: nan. Average loss: nan
Epoch 3, batch 399. Moving average of loss: nan. Average loss: nan
Epoch 4, batch 499. Moving average of loss: nan. Average loss: nan