1. 程式人生 > 其它 >Python邏輯迴歸模型應用舉例

Python邏輯迴歸模型應用舉例

取UCI公共測試資料庫中澳大利亞信貸批准資料集作為本例資料集,
其擁有14個特徵,1個分類標籤y(1--同意貸款,0--不同意貸款)共計690個申請者記錄

1、資料獲取

import  pandas as pd
data = pd.read_excel('credit.xlsx')
data
x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 d
0 1 22.08 11.460 2 4 4 1.585 0 0 0 1 2 100 1213 0
1 0 22.67 7.000 2 8 4 0.165 0 0 0 0 2 160 1 0
2 0 29.58 1.750 1 4 4 1.250 0 0 0 1 2 280 1 0
3 0 21.67 11.500 1 5 3 0.000 1 1 11 1 2 0 1 1
4 1 20.17 8.170 2 6 4 1.960 1 1 14 0 2 60 159 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
685 1 31.57 10.500 2 14 4 6.500 1 0 0 0 2 0 1 1
686 1 20.67 0.415 2 8 4 0.125 0 0 0 0 2 0 45 0
687 0 18.83 9.540 2 6 4 0.085 1 0 0 0 2 100 1 1
688 0 27.42 14.500 2 14 8 3.085 1 1 1 0 2 120 12 1
689 1 41.00 0.040 2 10 4 0.040 0 1 1 0 1 560 1 1

690 rows × 15 columns

2、訓練樣本與測試樣本劃分

#訓練用的特徵資料用x表示,預測變數用y表示   測試樣本分別記為x1,y1
#以前600資料為訓練資料,後90個為測試資料
x = data.iloc[:600,:14].values
x
array([[1.000e+00, 2.208e+01, 1.146e+01, ..., 2.000e+00, 1.000e+02,
        1.213e+03],
       [0.000e+00, 2.267e+01, 7.000e+00, ..., 2.000e+00, 1.600e+02,
        1.000e+00],
       [0.000e+00, 2.958e+01, 1.750e+00, ..., 2.000e+00, 2.800e+02,
        1.000e+00],
       ...,
       [1.000e+00, 3.492e+01, 2.500e+00, ..., 2.000e+00, 2.390e+02,
        2.010e+02],
       [1.000e+00, 2.408e+01, 8.750e-01, ..., 2.000e+00, 2.540e+02,
        1.951e+03],
       [1.000e+00, 3.733e+01, 6.500e+00, ..., 2.000e+00, 9.300e+01,
        1.000e+00]])
y = data.iloc[:600,14].values
y
array([0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 1], dtype=int64)
x1 = data.iloc[600:,:14].values
x1
array([[0.000e+00, 2.075e+01, 9.540e+00, ..., 2.000e+00, 2.000e+02,
        1.001e+03],
       [1.000e+00, 3.667e+01, 3.250e+00, ..., 2.000e+00, 1.020e+02,
        6.400e+02],
       [1.000e+00, 2.258e+01, 1.004e+01, ..., 2.000e+00, 6.000e+01,
        3.970e+02],
       ...,
       [0.000e+00, 1.883e+01, 9.540e+00, ..., 2.000e+00, 1.000e+02,
        1.000e+00],
       [0.000e+00, 2.742e+01, 1.450e+01, ..., 2.000e+00, 1.200e+02,
        1.200e+01],
       [1.000e+00, 4.100e+01, 4.000e-02, ..., 1.000e+00, 5.600e+02,
        1.000e+00]])
y1 = data.iloc[600:,14].values
y1
array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 1], dtype=int64)

3、邏輯迴歸分析

#匯入邏輯迴歸模組(LR)
from sklearn.linear_model import LogisticRegression as LR
#利用LR建立邏輯迴歸物件lr
lr = LR(max_iter=3000)
#呼叫lr中的fit()方法進行訓練
lr.fit(x,y)
LogisticRegression(max_iter=3000)
這裡遇到一個問題:TOP: TOTAL NO. of ITERATIONS REACHED LIMIT......extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG

解決辦法
意思是達到限制的迭代總數,只需要增加迭代次數(最大值)或縮放資料就可以。
將程式碼改為(增加迭代次數):

最大迭代次數預設值為1000,把它改為3000即可
lr = LR(max_iter=3000)

#呼叫lr中的score()方法返回模型準確率
r = lr.score(x,y)  #模型準確率(針對訓練資料)
r
0.875
#呼叫lr中的predict()方法,對測試樣本x1進行預測,獲取預測結果
R = lr.predict(x1)
R
array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 0], dtype=int64)
#預測準確率
Z = R-y1
Rs=len(Z[Z==0])/len(Z)
Rs
0.8666666666666667


import pandas as pd
data = pd.read_excel('credit.xlsx')
x = data.iloc[:600,:14].values
y = data.iloc[:600,14].values
x1= data.iloc[600:,:14].values
y1= data.iloc[600:,14].values
from sklearn.linear_model import LogisticRegression as LR
lr = LR(max_iter=3000)   #建立邏輯迴歸模型類
lr.fit(x, y) #訓練資料
r=lr.score(x, y); # 模型準確率(針對訓練資料)
print('模型準確率(針對訓練資料):',r)
R=lr.predict(x1)
Z=R-y1
Rs=len(Z[Z==0])/len(Z)
print('預測結果為:',R)
print('預測準確率為:',Rs)
模型準確率(針對訓練資料): 0.875
預測結果為: [0 1 1 1 1 0 0 1 0 1 1 0 1 0 1 1 0 0 0 1 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 1 0
 0 0 0 0 1 1 0 1 0 1 0 1 1 1 0 0 1 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 1
 0 0 0 0 0 1 0 1 1 0 1 1 0 1 1 0]
預測準確率為: 0.8666666666666667