5-3.簡單的線性迴歸
阿新 • • 發佈:2022-04-02
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1., 2., 3., 4., 5.])
y = np.array([1., 3., 2., 3., 5.])
plt.scatter(x, y)
plt.axis([0, 6, 0, 6])
plt.show()
用最小二乘法求出a, b
x_mean = np.mean(x)
y_mean = np.mean(y)
fenzi = 0.0 fenmu = 0.0 for x_i, y_i in zip(x, y): fenzi += (x_i - x_mean) * (y_i - y_mean) fenmu += (x_i - x_mean) ** 2
a = fenzi / fenmu
b = y_mean - a * x_mean
print(a)
print(b)
0.8
0.39999999999999947
#繪製直線
y_hat = a * x + b
plt.scatter(x, y)
plt.plot(x, y_hat, color = "red")
plt.axis([0, 6, 0, 6])
plt.show()
#新的樣本
x_predict = 6
y_predict = a * x_predict + b
y_predict
5.2
使用我們自己封裝的SimpleLinearRegression
在同級目錄下使用pycharm新建工程play_ML
工程裡新建一個python指令碼命名為SimpleLinearRegression,寫入以下程式碼
import numpy as np from .metrics import r2_score class SimpleLinearRegression1: def __init__(self): """初始化Simple Liner Regression模型""" self.a_ = None self.b_ = None def fit(self, x_train, y_train): """根據訓練資料集X_train和y_train訓練Simple Linear Regression模型""" assert x_train.ndim == 1, \ "the Simple Linear Regression can only solve single fearture training data" assert len(x_train) == len(y_train), \ "the size of x_train must be equal to the size of y_train" x_mean = np.mean(x_train) y_mean = np.mean(y_train) fenzi = 0.0 fenmu = 0.0 for x_i, y_i in zip(x_train, y_train): fenzi += (x_i - x_mean) * (y_i - y_mean) fenmu += (x_i - x_mean) ** 2 self.a_ = fenzi / fenmu self.b_ = y_mean - self.a_ * x_mean return self def predict(self, x_predict): """對於給定的待預測資料集x_predict,返回預測結果向量""" assert x_predict.ndim == 1, \ "the Simple Linear Regression can only solve single fearture training data" assert self.a_ is not None and self.b_ is not None, \ "must be fitte before predict!" return np.array([self._predict(x) for x in x_predict]) def _predict(self, x_single): """給定單個預測值x_single,返回預測結果""" return self.a_ * x_single + self.b_ def __repr__(self): return "SimpleLinearRegression1()" class SimpleLinearRegression2: def __init__(self): """初始化Simple Liner Regression模型""" self.a_ = None self.b_ = None def fit(self, x_train, y_train): """根據訓練資料集X_train和y_train訓練Simple Linear Regression模型""" assert x_train.ndim == 1, \ "the Simple Linear Regression can only solve single feature training data" assert len(x_train) == len(y_train), \ "the size of x_train must be equal to the size of y_train" x_mean = np.mean(x_train) y_mean = np.mean(y_train) fenzi = (x_train - x_mean).dot(y_train - y_mean) fenmu = (x_train - x_mean).dot(x_train - x_mean) self.a_ = fenzi / fenmu self.b_ = y_mean - self.a_ * x_mean return self def predict(self, x_predict): """對於給定的待預測資料集x_predict,返回預測結果向量""" assert x_predict.ndim == 1, \ "the Simple Linear Regression can only solve single feature training data" assert self.a_ is not None and self.b_ is not None, \ "must be fitted before predict!" return np.array([self._predict(x) for x in x_predict]) def _predict(self, x_single): """給定單個預測值x_single,返回預測結果""" return self.a_ * x_single + self.b_ def score(self, x_test, y_test): """根據測試資料集x_test和y_test確定當前模型的準確度""" y_predict = self.predict(x_test) return r2_score(y_test, y_predict) def __repr__(self): return "SimpleLinearRegression2()"
匯入自定義的迴歸函式
from play_ML.SimpleLinearRegression import SimpleLinearRegression1
reg1 = SimpleLinearRegression1()
reg1.fit(x, y)
SimpleLinearRegression1()
reg1.predict(np.array([x_predict]))
array([5.2])
reg1.a_
0.8
reg1.b_
0.39999999999999947
#得到模型
y_hat1 = reg1.predict(x)
#繪圖
plt.scatter(x, y)
plt.plot(x, y_hat1, color = "red")
plt.axis([0, 6, 0, 6])
plt.show()
向量化運算 實現SimpleLinearRegression
from play_ML.SimpleLinearRegression import SimpleLinearRegression2
reg2 = SimpleLinearRegression2()
reg2.fit(x, y)
SimpleLinearRegression2()
reg2.a_
0.8
reg2.b_
0.39999999999999947
#得到模型
y_hat2 = reg2.predict(x)
#繪圖
plt.scatter(x, y)
plt.plot(x, y_hat1, color = "red")
plt.axis([0, 6, 0, 6])
plt.show()
向量化實現的效能測試
m = 1000000
big_x = np.random.random(size=m)
big_y = big_x * 2.0 + 3 + np.random.normal(size=m)
%timeit reg1.fit(big_x, big_y)
%timeit reg2.fit(big_x, big_y)
639 ms ± 11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
7.76 ms ± 197 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
print(reg1.a_)
print(reg1.b_)
2.0045382114974615
2.9982778832590267
print(reg2.a_)
print(reg2.b_)
2.0045382114972243
2.9982778832591452