5-8.實現多元線性迴歸
阿新 • • 發佈:2022-04-03
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
boston_data = datasets.load_boston()
X = boston_data.data
y = boston_data.target
X = X[y < 50.0]
y = y[y < 50.0]
以下自定義的類匯入詳情見5-5衡量回歸演算法的標準
from play_ML.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, seed = 666)
使用我們自己封裝的簡單線性迴歸法
使用pycharm在同級目錄下新建工程play_ML
新建py指令碼命名為LinearRegression
寫入以下程式碼
import numpy as np from .metrics import r2_score class LinearRegression: def __init__(self): """初始化Linear Regression模型""" self.coef_ = None self.interception_ = None self._theta = None def fit_normal(self, X_train, y_train): """根據訓練資料X_train和y_train訓練LinearRegression模型""" assert X_train.shape[0] == y_train.shape[0], \ "the size of X_train must fit the size of y_train" X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train) self.interception_ = self._theta[0] self.coef_ = self._theta[1:] return self def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4): """根據訓練資料集X_train和y_train使用梯度下降法訓練LinearRegression模型""" assert X_train.shape[0] == y_train.shape[0], \ "the size of X_train must be equal to the size of y_train" def J(theta, X_b, y): try: return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b) except: return float('inf') """偏導函式""" def dJ(theta, X_b, y): # res = np.empty(len(theta)) # res[0] = np.sum(X_b.dot(theta) - y) # for i in range(1, len(theta)): # res[i] = np.sum((X_b.dot(theta) - y).dot(X_b[:, i])) # # 求和結果乘以某個樣本的第i列 """向量化的方式求解""" return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(y) def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8): theta = initial_theta i_iters = 0 while i_iters < n_iters: gradient = dJ(theta, X_b, y) last_theta = theta theta = theta - eta * gradient if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon): break i_iters += 1 return theta X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) initial_theta = np.zeros(X_b.shape[1]) eta = 0.01 self._theta = gradient_descent(X_b, y_train, initial_theta, eta) self.interception_ = self._theta[0] self.coef_ = self._theta[1:] return self def fit_sgd(self, X_train, y_train, n_iters=5, t0=5, t1=50): """根據訓練資料集X_train和y_train使用隨機梯度下降法訓練LinearRegression模型""" assert X_train.shape[0] == y_train.shape[0], \ "the size of X_train must be equal to the size of y_train" assert n_iters >= 1, \ "所有樣本至少遍歷一次" def dJ_sgd(theta, X_b_i, y_i): return X_b_i.T.dot(X_b_i.dot(theta) - y_i) * 2. def sgd(X_b, y, initial_theta, n_iters, t0=5, t1=50): def learning_rate(t): return t0 / (t + t1) theta = initial_theta m = len(X_b) """為了保證將所有的樣本遍歷到,所以採用巢狀迴圈,外迴圈是遍數,內迴圈是隨機樣本""" for i_iters in range(n_iters): shuffled_indexes = np.random.permutation(m) X_b_new = X_b[shuffled_indexes] y_new = y[shuffled_indexes] for i in range(m): """直接從亂序樣本中取值""" gradient = dJ_sgd(theta, X_b_new[i], y_new[i]) """學習率的計算也要做相應的改變""" theta = theta - learning_rate(i_iters * m + i) * gradient return theta X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) initial_theta = np.zeros(X_b.shape[1]) self._theta = sgd(X_b, y_train, initial_theta, n_iters, t0, t1) self.interception_ = self._theta[0] self.coef_ = self._theta[1:] return self def predict(self, X_predict): assert self.interception_ is not None and self.coef_ is not None, \ "must be fitted before predicted" assert X_predict.shape[1] == len(self.coef_), \ "the feature number of X_predict must be equal to the X_train" X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict]) return X_b.dot(self._theta) def score(self, X_test, y_test): """根據測試資料集X_test和y_test判斷當前模型的準確度""" y_predict = self.predict(X_test) return r2_score(y_test, y_predict) def __repr__(self): return "LinearRegression()"
匯入自定義的迴歸演算法
from play_ML.LinearRegression import LinearRegression
reg = LinearRegression()
reg.fit_normal(X_train, y_train)
LinearRegression()
reg.coef_
array([-1.12728076e-01, 3.83088307e-02, -4.09966537e-02, 7.27425361e-01,
-1.39378594e+01, 3.37684332e+00, -2.39762421e-02, -1.21315896e+00,
2.73164472e-01, -1.40027977e-02, -8.62432754e-01, 5.37440212e-03,
-3.59762900e-01])
reg.interception_
36.81014683461928
reg.score(X_test, y_test)
0.7989582352420577