1. 程式人生 > >Python實現ridge和lasso

Python實現ridge和lasso

# -*- coding: utf-8 -*-
"""
Created on Mon Nov 12 17:07:16 2018

@author: wp:lasso|ridge 
"""
#經典鳶尾花資料集
from sklearn.datasets import load_iris

iris = load_iris()

data_x = iris.data 
data_y = iris.target


#帶入需要的包、庫
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import Ridge,RidgeCV
from sklearn.linear_model import Lasso,LassoCV
from sklearn.metrics import mean_squared_error

x_tr,x_te,y_tr,y_te = train_test_split(data_x,data_y,train_size = 0.7,random_state =22)
######################ridge########################################
#通過不同的alpha值 生成不同的ridge模型
alphas = 10**np.linspace(-10,10,100)
ridge_cofficients = []

for alpha in alphas:
    ridge = Ridge(alpha = alpha, normalize=True)
    ridge.fit(x_tr, y_tr)
    ridge_cofficients.append(ridge.coef_)
    
 #畫出alpha和迴歸係數的關係   
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
# 設定繪圖風格
plt.style.use('ggplot')
plt.plot(alphas, ridge_cofficients)
plt.xscale('log')
plt.axis('tight')
plt.title(r'alpha係數與嶺迴歸係數的關係')
plt.xlabel('Log Alpha')
plt.ylabel('Cofficients')
plt.show()

#ridge交叉驗證
ridge_cv = RidgeCV(alphas = alphas, normalize=True, scoring='mean_squared_error', cv = 10)
ridge_cv.fit(x_tr, y_tr)
# 取出最佳的lambda值ridge_best_alpha = ridge_cv.alpha_
ridge_best_alpha = ridge_cv.alpha_ #得到最佳lambda值
#基於最佳lambda值建模
ridge = Ridge(alpha = ridge_best_alpha,normalize = True)
ridge.fit(x_tr,y_tr)
ridge_predict = ridge.predict(x_te)
rmse = np.sqrt(mean_squared_error(y_te,ridge_predict))

######################lasso##################################



# LASSO迴歸模型的交叉驗證
lasso_cv = LassoCV(alphas = alphas, normalize=True, cv = 10, max_iter=10000)
lasso_cv.fit(x_tr,y_tr)
# 取出最佳的lambda值
lasso_best_alpha = lasso_cv.alpha_
lasso_best_alpha
#基於最佳lambda值建模
lasso = Lasso(alpha = lasso_best_alpha, normalize=True, max_iter=10000)
lasso.fit(x_tr, y_tr)

lasso_predict = lasso.predict(x_te) #預測

RMSE = np.sqrt(mean_squared_error(y_te,lasso_predict))