基於梯度下降法實現線性迴歸演算法
阿新 • • 發佈:2019-02-05
# coding: utf-8 # In[1]: # 資料校驗 def validate(X, Y): if len(X) != len(Y): raise Exception("引數異常") else: m = len(X[0]) for l in X: if len(l) != m: raise Exception("引數異常") if len(Y[0]) != 1: raise Exception("引數異常") # 計算差異值 def calcDiffe(x, y, a): lx = len(x) la = len(a) if lx == la: result = 0 for i in range(lx): result += x[i] * a[i] return y - result elif lx + 1 == la: result = 0 for i in range(lx): result += x[i] * a[i] result += 1 * a[lx] # 加上常數項 return y - result else : raise Exception("引數異常") ## 要求X必須是List集合,Y也必須是List集合 def fit(X, Y, alphas, threshold=1e-6, maxIter=20, addConstantItem=True): import math import numpy as np ## 校驗 validate(X, Y) ## 開始模型構建 l = len(alphas) m = len(Y) n = len(X[0]) + 1 if addConstantItem else len(X[0]) B = [True for i in range(l)] ## 差異性(損失值) J = [np.nan for i in range(l)] # 1. 隨機初始化0值(全部為0), a的最後一列為常數項 a = [[0 for j in range(n)] for i in range(l)] # 2. 開始計算 for times in range(maxIter): for i in range(l): if not B[i]: # 如果當前alpha的值已經計算到最優解了,那麼不進行繼續計算 continue ta = a[i] for j in range(n): alpha = alphas[i] ts = 0 for k in range(m): if j == n - 1 and addConstantItem: ts += alpha*calcDiffe(X[k], Y[k][0], a[i]) * 1 else: ts += alpha*calcDiffe(X[k], Y[k][0], a[i]) * X[k][j] t = ta[j] + ts ta[j] = t ## 計算完一個alpha值的0的損失函式 flag = True js = 0 for k in range(m): js += math.pow(calcDiffe(X[k], Y[k][0], a[i]),2) if js > J[i]: flag = False break; if flag: J[i] = js for j in range(n): a[i][j] = ta[j] else: # 標記當前alpha的值不需要再計算了 B[i] = False ## 計算完一個迭代,當目標函式/損失函式值有一個小於threshold的結束迴圈 r = [0 for j in J if j <= threshold] if len(r) > 0: break # 如果全部alphas的值都結算到最後解了,那麼不進行繼續計算 r = [0 for b in B if not b] if len(r) > 0: break; # 3. 獲取最優的alphas的值以及對應的0值 min_a = a[0] min_j = J[0] min_alpha = alphas[0] for i in range(l): if J[i] < min_j: min_j = J[i] min_a = a[i] min_alpha = alphas[i] print "最優的alpha值為:",min_alpha # 4. 返回最終的0值 return min_a # 預測結果 def predict(X,a): Y = [] n = len(a) - 1 for x in X: result = 0 for i in range(n): result += x[i] * a[i] result += a[n] Y.append(result) return Y # 計算實際值和預測值之間的相關性 def calcRScore(y,py): if len(y) != len(py): raise Exception("引數異常") import math import numpy as np avgy = np.average(y) m = len(y) rss = 0.0 tss = 0 for i in range(m): rss += math.pow(y[i] - py[i], 2) tss += math.pow(y[i] - avgy, 2) r = 1.0 - 1.0 * rss / tss return r # In[2]: import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd import warnings import sklearn from sklearn.linear_model import LinearRegression,Ridge, LassoCV, RidgeCV, ElasticNetCV from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import Pipeline from sklearn.linear_model.coordinate_descent import ConvergenceWarning # In[3]: ## 設定字符集,防止中文亂碼 mpl.rcParams['font.sans-serif']=[u'simHei'] mpl.rcParams['axes.unicode_minus']=False # In[4]: warnings.filterwarnings(action = 'ignore', category=ConvergenceWarning) ## 建立模擬資料 np.random.seed(0) np.set_printoptions(linewidth=1000, suppress=True) N = 10 x = np.linspace(0, 6, N) + np.random.randn(N) y = 1.8*x**3 + x**2 - 14*x - 7 + np.random.randn(N) x.shape = -1, 1 y.shape = -1, 1 # In[5]: plt.figure(figsize=(12,6), facecolor='w') ## 模擬資料產生 x_hat = np.linspace(x.min(), x.max(), num=100) x_hat.shape = -1,1 ## 線性模型 model = LinearRegression() model.fit(x,y) y_hat = model.predict(x_hat) s1 = calcRScore(y, model.predict(x)) print model.score(x,y) ## 自帶R^2輸出 print "模組自帶實現===============" print "引數列表:", model.coef_ print "截距:", model.intercept_ ## 自模型 ma = fit(x,y,np.logspace(-4,-2,100), addConstantItem=True) y_hat2 = predict(x_hat, ma) s2 = calcRScore(y, predict(x,ma)) print "自定義實現模型=============" print "引數列表:", ma ## 開始畫圖 plt.plot(x, y, 'ro', ms=10, zorder=3) plt.plot(x_hat, y_hat, color='#b624db', lw=2, alpha=0.75, label=u'Python模型,$R^2$:%.3f' % s1, zorder=2) plt.plot(x_hat, y_hat2, color='#6d49b6', lw=2, alpha=0.75, label=u'自己實現模型,$R^2$:%.3f' % s2, zorder=1) plt.legend(loc = 'upper left') plt.grid(True) plt.xlabel('X', fontsize=16) plt.ylabel('Y', fontsize=16) plt.suptitle(u'自定義的線性模型和模組中的線性模型比較', fontsize=22) plt.show() # In[6]: from sklearn.ensemble import GradientBoostingRegressor#梯度下降的迴歸 clf = GradientBoostingRegressor() y1 = y.ravel() clf.fit(x,y1) print "自帶梯度下降法R方:", clf.score(x,y1) y_hat3=clf.predict(x_hat) s3=calcRScore(y, clf.predict(x)) ## 開始畫圖 plt.plot(x, y, 'ro', ms=10, zorder=3) plt.plot(x_hat, y_hat, color='#b624db', lw=2, alpha=0.75, label=u'Python模型,$R^2$:%.3f' % s1, zorder=2) plt.plot(x_hat, y_hat2, color='#6d49b6', lw=2, alpha=0.75, label=u'自己實現模型,$R^2$:%.3f' % s2, zorder=1) plt.plot(x_hat, y_hat3, color='#6daaba', lw=2, alpha=0.75, label=u'自帶梯度下降方法,$R^2$:%.3f' % s3, zorder=1) plt.legend(loc = 'upper left') plt.grid(True) plt.xlabel('X', fontsize=16) plt.ylabel('Y', fontsize=16) plt.suptitle(u'自定義的線性模型和模組中的線性模型比較', fontsize=22) plt.show() # In[ ]: