阿新 • • 發佈:2019-02-16
梯度下降是一階迭代優化演算法。為了使用梯度下降找到函式的區域性最小值,一個步驟與當前位置的函式的梯度(或近似梯度)的負值成正比。如果相反,一個步驟與梯度的正數成比例,則接近該函式的區域性最大值;該程式隨後被稱為梯度上升。梯度下降也被稱為最陡峭的下降,或最快下降的方法。(from wikipad)
本質是每次迭代的時候都沿著梯度最大的地方更新引數。現在假設有函式(Rosenbrock函式:是一個用來測試最優化演算法效能的非凸函式,由Howard Harry Rosenbrock在1960年提出[1]。也稱為Rosenbrock山谷或Rosenbrock香蕉函式,也簡稱為香蕉函式)如下定義:
在實現的過程中可以給出x, y初始值(例如設定為 0, 0) 然後計算函式在這個點的梯度,並按照梯度方向更新x, y的值。
這裡給出通過梯度下降法計算上述函式的最小值對應的x 和 y
import numpy as np
def cal_rosenbrock(x1, x2):
:param x1:
:param x2:
return (1 - x1) ** 2 + 100 * (x2 - x1 ** 2) ** 2
def cal_rosenbrock_prax (x1, x2):
return -2 + 2 * x1 - 400 * (x2 - x1 ** 2) * x1
def cal_rosenbrock_pray(x1, x2):
return 200 * (x2 - x1 ** 2)
def for_rosenbrock_func(max_iter_count=100000, step_size=0.001):
pre_x = np.zeros((2,), dtype=np.float32)
loss = 10
iter_count = 0
while loss > 0.001 and iter_count < max_iter_count:
error = np.zeros((2,), dtype=np.float32)
error[0] = cal_rosenbrock_prax(pre_x[0], pre_x[1])
error[1] = cal_rosenbrock_pray(pre_x[0], pre_x[1])
for j in range(2):
pre_x[j] -= step_size * error[j]
loss = cal_rosenbrock(pre_x[0], pre_x[1]) # 最小值為0
print("iter_count: ", iter_count, "the loss:", loss)
iter_count += 1
return pre_x
if __name__ == '__main__':
w = for_rosenbrock_func()
import numpy as np
def gen_line_data(sample_num=100):
y = 3*x1 + 4*x2
x1 = np.linspace(0, 9, sample_num)
x2 = np.linspace(4, 13, sample_num)
x = np.concatenate(([x1], [x2]), axis=0).T
y = np.dot(x, np.array([3, 4]).T) # y 列向量
return x, y
def bgd(samples, y, step_size=0.01, max_iter_count=10000):
sample_num, dim = samples.shape
y = y.flatten()
w = np.ones((dim,), dtype=np.float32)
loss = 10
iter_count = 0
while loss > 0.001 and iter_count < max_iter_count:
loss = 0
error = np.zeros((dim,), dtype=np.float32)
for i in range(sample_num):
predict_y = np.dot(w.T, samples[i])
for j in range(dim):
error[j] += (y[i] - predict_y) * samples[i][j]
for j in range(dim):
w[j] += step_size * error[j] / sample_num
for i in range(sample_num):
predict_y = np.dot(w.T, samples[i])
error = (1 / (sample_num * dim)) * np.power((predict_y - y[i]), 2)
loss += error
print("iter_count: ", iter_count, "the loss:", loss)
iter_count += 1
return w
if __name__ == '__main__':
samples, y = gen_line_data()
w = bgd(samples, y)
print(w) # 會很接近[3, 4]
import numpy as np
def gen_line_data(sample_num=100):
y = 3*x1 + 4*x2
x1 = np.linspace(0, 9, sample_num)
x2 = np.linspace(4, 13, sample_num)
x = np.concatenate(([x1], [x2]), axis=0).T
y = np.dot(x, np.array([3, 4]).T) # y 列向量
return x, y
def sgd(samples, y, step_size=0.01, max_iter_count=10000):
:param samples: 樣本
:param y: 結果value
:param step_size: 每一接迭代的步長
:param max_iter_count: 最大的迭代次數
:param batch_size: 隨機選取的相對於總樣本的大小
sample_num, dim = samples.shape
y = y.flatten()
w = np.ones((dim,), dtype=np.float32)
loss = 10
iter_count = 0
while loss > 0.001 and iter_count < max_iter_count:
loss = 0
error = np.zeros((dim,), dtype=np.float32)
for i in range(sample_num):
predict_y = np.dot(w.T, samples[i])
for j in range(dim):
error[j] += (y[i] - predict_y) * samples[i][j]
w[j] += step_size * error[j] / sample_num
# for j in range(dim):
# w[j] += step_size * error[j] / sample_num
for i in range(sample_num):
predict_y = np.dot(w.T, samples[i])
error = (1 / (sample_num * dim)) * np.power((predict_y - y[i]), 2)
loss += error
print("iter_count: ", iter_count, "the loss:", loss)
iter_count += 1
return w
if __name__ == '__main__':
samples, y = gen_line_data()
w = sgd(samples, y)
print(w) # 會很接近[3, 4]
import numpy as np
import random
def gen_line_data(sample_num=100):
y = 3*x1 + 4*x2
x1 = np.linspace(0, 9, sample_num)
x2 = np.linspace(4, 13, sample_num)
x = np.concatenate(([x1], [x2]), axis=0).T
y = np.dot(x, np.array([3, 4]).T) # y 列向量
return x, y
def mbgd(samples, y, step_size=0.01, max_iter_count=10000, batch_size=0.2):
MBGD(Mini-batch gradient descent)小批量梯度下降:每次迭代使用b組樣本
:param samples:
:param y:
:param step_size:
:param max_iter_count:
:param batch_size:
sample_num, dim = samples.shape
y = y.flatten()
w = np.ones((dim,), dtype=np.float32)
# batch_size = np.ceil(sample_num * batch_size)
loss = 10
iter_count = 0
while loss > 0.001 and iter_count < max_iter_count:
loss = 0
error = np.zeros((dim,), dtype=np.float32)
# batch_samples, batch_y = select_random_samples(samples, y,
# batch_size)
index = random.sample(range(sample_num),
int(np.ceil(sample_num * batch_size)))
batch_samples = samples[index]
batch_y = y[index]
for i in range(len(batch_samples)):
predict_y = np.dot(w.T, batch_samples[i])
for j in range(dim):
error[j] += (batch_y[i] - predict_y) * batch_samples[i][j]
for j in range(dim):
w[j] += step_size * error[j] / sample_num
for i in range(sample_num):
predict_y = np.dot(w.T, samples[i])
error = (1 / (sample_num * dim)) * np.power((predict_y - y[i]), 2)
loss += error
print("iter_count: ", iter_count, "the loss:", loss)
iter_count += 1
return w
if __name__ == <