[佔坑]線性迴歸 Python 程式碼註釋
阿新 • • 發佈:2018-12-17
先佔一個坑,最近幾周在做一個比賽,十一月份回來補
程式碼發上來註釋掉免得又鴿了。。。。
《機器學習實戰》這本書有幾點不適合入門者深入學習的地方是關鍵程式碼沒有給出具體原理,而介紹方式是對機器學習每個基礎演算法進行講解,所以經常在學習幾個章節之間會感覺到斷層,需要其他一些地方自行理解。線性迴歸這章後面的優化方式又講的太多沒講清,我認為這是一個問題,部落格也不好記。最好在一個專門的章節講優化方式。
Linear Regression
計算 ws,拿w和X擬合即可。
from numpy import *
import matplotlib.pyplot as plt
# the same code as before
def local_dataset(filename):
num_feat = len(open(filename).readline().split('\t')) - 1
datamat = []
labelmat = []
fr = open(filename, 'r')
for line in fr.readlines():
line_arr = []
cur_line = line.strip().split('\t')
for i in range(num_feat):
line_arr. append(float(cur_line[i]))
datamat.append(line_arr)
labelmat.append(float(cur_line[-1]))
return datamat, labelmat
# traditional linear regression
def stand_regression(x_arr, y_arr):
x_mat = mat(x_arr)
y_mat = mat(y_arr).T
n = shape(x_mat)[0]
xTx = x_mat.T * x_mat
if linalg.det(xTx) == 0.0:
print('This matrix is singular,cannot do inverse')
return
ws = xTx.I * (x_mat.T * y_mat) # w estimataion
return ws
if __name__ == '__main__':
x_arr, y_arr = local_dataset('ex0.txt')
ws = stand_regression(x_arr, y_arr)
x_mat = mat(x_arr)
y_mat = mat(y_arr)
y_hat = x_mat * ws
# plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_mat[:, 1].flatten().A[0], y_mat.T[:, 0].flatten().A[0])
'''
ax.plot(x_mat[:,1],y_hat)
plt.show()
'''
x_copy=x_mat.copy()
x_copy.sort(0)
y_hat=x_copy*ws
ax.plot(x_copy[:, 1],y_hat)
plt.show()
Local Weighted Linear Regression
區域性加權線性迴歸,給每個資料點x_i
加權,由於用的是高斯核,每次計算x_i
時其他行和該行比較,再加一個權重,因此計算y_i
的估計值 yhat
不同於線性的做法直接得出,而是分部計算每個yhat[i]
from numpy import *
import matplotlib.pyplot as plt
# the same code as before
def local_dataset(filename):
num_feat = len(open(filename).readline().split('\t')) - 1
datamat = []
labelmat = []
fr = open(filename, 'r')
for line in fr.readlines():
line_arr = []
cur_line = line.strip().split('\t')
for i in range(num_feat):
line_arr.append(float(cur_line[i]))
datamat.append(line_arr)
labelmat.append(float(cur_line[-1]))
return datamat, labelmat
# traditional linear regression
def stand_regression(x_arr, y_arr):
x_mat = mat(x_arr)
y_mat = mat(y_arr).T
n = shape(x_mat)[0]
xTx = x_mat.T * x_mat
if linalg.det(xTx) == 0.0:
print('This matrix is singular,cannot do inverse')
return
ws = xTx.I * (x_mat.T * y_mat) # w estimataion
return ws
# calc each line's weights
def lwlr_ws(cur_point, x_arr, y_arr, k=1.0):
x_mat = mat(x_arr)
y_mat = mat(y_arr).T
n = shape(x_mat)[0]
weights = mat(eye(n))
for i in range(n):
diffmat = cur_point - x_mat[i, :]
weights[i, i] = exp(diffmat * diffmat.T / (-2.0 * k ** 2))
xTx = x_mat.T * (weights * x_mat)
if linalg.det(xTx) == 0.0:
print('This matrix is singular,cannot do inverse')
return
ws = xTx.I * (x_mat.T * weights * y_mat)
return ws
# calc each yhat[i]
def lwlr_yhat(all_point, x_arr, y_arr, k=1.0): # loops over all the data points an applies lwlr to each one
n = shape(all_point)[0]
yhat = zeros(n)
for i in range(n):
yhat[i] = all_point[i] * lwlr_ws(all_point[i], x_arr, y_arr, k)
return yhat
# plt
def lwlr_plot():
x_arr, y_arr = local_dataset('ex0.txt')
x_mat = mat(x_arr)
y_mat = mat(y_arr)
sort_index = x_mat[:, 1].argsort(0)
x_sort = x_mat[sort_index][:, 0, :]
yhat = lwlr_yhat(x_mat, x_arr, y_arr, k=0.01)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_mat[:, 1].flatten().A[0], y_mat.T[:, 0].flatten().A[0], s=2, c='red')
ax.plot(x_sort[:, 1], yhat[sort_index])
fig.show()
if __name__ == '__main__':
lwlr_plot()