1. 程式人生 > >Python_一元線性迴歸及迴歸顯著性

Python_一元線性迴歸及迴歸顯著性

1、資料準備

資料來源自《應用迴歸分析》(第四版)

## 火災損失表
### 距離消防站km
x = [3.4, 1.8, 4.6, 2.3, 3.1, 5.5, 0.7, 3.0, 2.6, 4.3, 2.1, 1.1, 6.1, 4.8, 3.8]
### 火災損失 千元
y = [26.2, 17.8, 31.3, 23.1, 27.5, 36.0, 14.1, 22.3, 19.6, 31.3, 24.0, 17.3, 43.2, 36.4, 26.1]

繪製散點圖觀察基本趨勢:

import matplotlib.pyplot as plt
plt.style.use('ggplot'
) ## 解決中文字元顯示不全 from matplotlib.font_manager import FontProperties font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12) plt.scatter(x, y) plt.xlabel('距離消防站/千米',fontproperties = font) plt.ylabel("火災損失/千元",fontproperties = font) plt.title('火災損失',fontproperties = font) plt.show()

在這裡插入圖片描述

2、sklearn
大材小用線性迴歸

import numpy as np
from sklearn.linear_model import LinearRegression
x_in = np.array(x).reshape(-1,1)
y_in = np.array(y).reshape(-1,1)
lreg = LinearRegression()
lreg.fit(x_in, y_in)
y_prd = lreg.predict(x_in)

輸出相關統計引數

#### 統計量引數
def get_lr_stats(x, y, model):
    message0 = '一元線性迴歸方程為: '+'\ty'
+ '=' + str(model.intercept_[0])+' + ' +str(model.coef_[0][0]) + '*x' from scipy import stats n = len(x) y_prd = model.predict(x) Regression = sum((y_prd - np.mean(y))**2) # 迴歸 Residual = sum((y - y_prd)**2) # 殘差 R_square = Regression / (Regression + Residual) # 相關性係數R^2 F = (Regression / 1) / (Residual / ( n - 2 )) # F 分佈 pf = stats.f.sf(F, 1, n-2) message1 = ('相關係數(R^2): ' + str(R_square[0]) + ';' + '\n' + '迴歸分析(SSR): ' + str(Regression[0]) + ';' + '\t殘差(SSE): ' + str(Residual[0]) + ';' + '\n' + ' F : ' + str(F[0]) + ';' + '\t' + 'pf : ' + str(pf[0]) ) ## T L_xx = n * np.var(x) sigma = np.sqrt(Residual / n) t = model.coef_ * np.sqrt(L_xx) / sigma pt = stats.t.sf(t, n-2) message2 = ' t : ' + str(t[0][0])+ ';' + '\t' + 'pt : ' + str(pt[0][0]) return print(message0 +'\n' +message1 + '\n'+message2) get_lr_stats(x_in, y_in, lreg)

結果如下

>>> get_lr_stats(x_in, y_in, lreg)
一元線性迴歸方程為:     y=10.277928549524688 + 4.91933072677093*x
相關係數(R^2): 0.9234781689805285;
迴歸分析(SSR): 841.7663579806808;     殘差(SSE): 69.75097535265259;
           F : 156.88615963321718;    pf : 1.2478000079204313e-08
           t : 13.45445992541066;     pt : 2.6193260491226123e-09

2-1 同樣也可以用statsmodels 來實現

from statsmodels.formula.api import ols
import pandas as pd
data = pd.DataFrame({'x':x, 'y':y})
model = ols('y~x', data).fit()
print(model.summary())

結果如下:

                            OLS Regression Results
==============================================================================
Dep. Variable:                      y   R-squared:                       0.923
Model:                            OLS   Adj. R-squared:                  0.918
Method:                 Least Squares   F-statistic:                     156.9
Date:                Mon, 24 Sep 2018   Prob (F-statistic):           1.25e-08
Time:                        20:56:18   Log-Likelihood:                -32.811
No. Observations:                  15   AIC:                             69.62
Df Residuals:                      13   BIC:                             71.04
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     10.2779      1.420      7.237      0.000       7.210      13.346
x              4.9193      0.393     12.525      0.000       4.071       5.768
==============================================================================
Omnibus:                        2.551   Durbin-Watson:                   1.318
Prob(Omnibus):                  0.279   Jarque-Bera (JB):                1.047
Skew:                          -0.003   Prob(JB):                        0.592
Kurtosis:                       1.706   Cond. No.                         9.13
==============================================================================

3、scipy.interpolate圖示線性迴歸效果

from scipy.interpolate import interp1d # 進行插值畫圖
linear_interp = interp1d(x, y_prd.transpose()[0], kind='linear')
computed = np.linspace(min(x),max(x) , 50)
linear_results = linear_interp(computed)  

plt.scatter(x, y,s = 8, label = 'orignal data')
plt.scatter(x_in, y_prd,c = 'green', s = 9 ,  label = 'predict data')
plt.plot(computed, linear_results , label = 'linear_interp', alpha = 0.7, c = 'orange')
plt.xlabel('距離消防站/千米',fontproperties = font)
plt.ylabel("火災損失/千元",fontproperties = font)
plt.title('火災損失',fontproperties = font)
plt.ylim(0,50)
plt.legend(loc = 'upper left')
plt.show()

在這裡插入圖片描述

4、殘差圖

#### 殘差圖
def Residual_plot(x, y, model):
    from matplotlib.font_manager import FontProperties
    font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12) 
    message = ''
    ycout = '不存在異常值'
    n = len(x) 
    y_prd = model.predict(x)
    e = y - y_prd
    sigama = np.std(e)
    ## ZRE 標準化殘差
    zre = e / sigama
    ## SRE 學生化殘差
    L_xx = n * np.var(x)
    hii = 1/n + (x - np.mean(x))/L_xx ## 槓桿值
    sre = e/(sigama*np.sqrt(1 - hii))
    if sum(sre > 3)[0]:
        ycout = x[sre>3], y[sre>3]
        message = '異常值: ' +str(ycout)
    else:
        message = '異常值: ' + ycout

    ## 繪圖
    mx = max(x)[0] + 1
    plt.scatter(x, e, c = 'red', s= 6)
    plt.plot([0, mx],[2*sigama,2*sigama], 'k--', c='green')
    plt.plot([0, mx],[-2*sigama,-2*sigama], 'k--', c='green')
    plt.plot([0, mx],[3*sigama,3*sigama], 'k--', c='orange')
    plt.plot([0, mx],[-3*sigama,-3*sigama], 'k--', c='orange')
    plt.annotate(message, xy = (0, np.ceil(3*sigama+1)), xycoords = 'data',fontproperties = font)
    plt.xlim(0, mx) 
    plt.ylim(-np.ceil(3*sigama+2), np.ceil(3*sigama+2))
    plt.show()
    return print(message)

Residual_plot(x_in, y_in, lreg)

在這裡插入圖片描述