1. 程式人生 > >利用sklearn 中的線性迴歸模型訓練資料 使用到的庫有numpy pandas matplotlib

利用sklearn 中的線性迴歸模型訓練資料 使用到的庫有numpy pandas matplotlib

**

利用sklearn 中的線性迴歸模型訓練資料 使用到的庫有numpy pandas matplotlib

**

import pandas as pd 
import matplotlib.pyplot as plt 
 
filename='ENB2012_data.xlsx'
columns=['Ralative Compactness','Surface Area','wall area','Roof Area','Overall Heirgt',
            'orientation','glazing area','grazubg area distribution','Heating Load','Cooling Load']

# data=pd.read_excel(filename,header=None,names=columns,skiprows=1)#兩者的讀取好像沒什麼區別  可能就是讀取時間的差別
data=pd.io.excel.read_excel(filename,header=None,names=columns,skiprows=1)#  原來如此
#skiprows 這個引數也太重要了 丟棄一行前面無用的頭行  這樣可以以後的計算均值也就可以直接計算了
# print(data)
# data_std=data.std()
# data_mean=data.mean()
# data_median=data.median()
# print('std\n',data_std)
# print('mean\n',data_mean)
# print('median\n',data_median)


# data.plot(kind='scatter',x=['Surface Area','Surface Area'],y=['Heating Load','Cooling Load'],title='scartter',grid=True)
# #這個相當於在一張圖中疊加畫了起來,並不是一張圖上面顯示兩個散點圖
# plt.show()




# from matplotlib import animation#動態圖所需要的包
# import numpy as  np
# fig,ax = plt.subplots()#子影象
# x = np.arange(0,2*np.pi,0.01)
# line, = ax.plot(x,np.sin(x))

# def animate(i):
#     line.set_ydata(np.sin(x+i/10))#用來改變的y對應的值
#     return line,
# def init():
#     line.set_ydata(np.sin(x))#動態圖初始影象
#     return line,

# ani = animation.FuncAnimation(fig=fig,func=animate,init_func=init,interval=20)#動態作圖的方法,func動態圖函式,init_func初始化函式,interval指影象改變的時間間隔
# plt.show()


# ax1=plt.subplot(2,1,1)
# ax1.scatter(data['Surface Area'],data['Heating Load'],marker='*',label='sandiantu1')
# ax2=plt.subplot(2,1,2)
# ax2.scatter(data['Surface Area'],data['Cooling Load'],marker='*',label='sandiantu2')
# plt.show()






# x=data.iloc[:,0:8].values.astype(float)
# y1=data.iloc[:,8].values.astype(float)
# y2=data.iloc[:,9].values.astype(float)
# x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y1,test_size=0.40)
# linreg = LinearRegression()#建立線性化模型
# linregTr = linreg.fit(x_train,y_train)#訓練集 訓練出的模型
# y_train_pred = linregTr.predict(x_train)#模型預測
# y_test_pred = linregTr.predict(x_test)#測試集模型測試


# mse_train = metrics.mean_squared_error(y_train,y_train_pred)
# mse_test = metrics.mean_squared_error(y_test,y_test_pred)
# decision_score = linregTr.score(x_train,y_train)
# predict_score = linregTr.score(x_test,y_test)#預測之後的準確度打分
# print(data)
# print('This is trained data\'s mean_squared_error:',mse_train)
# print('This is tested data\'s mean_squared_error:',mse_test)
# print('This is decision_score:',decision_score)
# print('This is predict_score:',predict_score)



from sklearn import model_selection 
from sklearn import preprocessing 
# from sklearn import LinearRegression##這樣匯入線性迴歸模型不可實現
from sklearn.linear_model import LinearRegression


x=data.iloc[:,0:8]
y1=data.iloc[:,8]
y2=data.iloc[:,9]

x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y1,test_size=0.40)
linreg=LinearRegression()
linregTr=linreg.fit(x_train,y_train)#開始用訓練集訓練模型
decision_score=linregTr.score(x_train,y_train)#用訓練集得出的效能引數
predict_score=linregTr.score(x_test,y_test)#用測試集得出的效能引數   實際效能


# decision_score_pred=linregTr.predict(x_train)#準確度預測
# predict_score_pred=linregTr.predict(x_test)
# x_train_predict=linreg.predict(x_train)#預測的過程中的引數變化過程
# x_test_predict=linreg.predict(x_test)
# print(decision_score_pred)
# print(predict_score)
# print(predict_score)
# print(decision_score)
# print(predict_score)


# from sklearn import metrics
from sklearn.metrics import *
y_train_pred =linregTr.predict(x_train)
# mean_error=metrics.mean_squared_error(y_train,y_train_pred)
mean_error=mean_squared_error(y_train_pred,y_train)
print(mean_error)