利用sklearn 中的線性迴歸模型訓練資料 使用到的庫有numpy pandas matplotlib
阿新 • • 發佈:2018-11-02
**
利用sklearn 中的線性迴歸模型訓練資料 使用到的庫有numpy pandas matplotlib
**
import pandas as pd import matplotlib.pyplot as plt filename='ENB2012_data.xlsx' columns=['Ralative Compactness','Surface Area','wall area','Roof Area','Overall Heirgt', 'orientation','glazing area','grazubg area distribution','Heating Load','Cooling Load'] # data=pd.read_excel(filename,header=None,names=columns,skiprows=1)#兩者的讀取好像沒什麼區別 可能就是讀取時間的差別 data=pd.io.excel.read_excel(filename,header=None,names=columns,skiprows=1)# 原來如此 #skiprows 這個引數也太重要了 丟棄一行前面無用的頭行 這樣可以以後的計算均值也就可以直接計算了 # print(data) # data_std=data.std() # data_mean=data.mean() # data_median=data.median() # print('std\n',data_std) # print('mean\n',data_mean) # print('median\n',data_median) # data.plot(kind='scatter',x=['Surface Area','Surface Area'],y=['Heating Load','Cooling Load'],title='scartter',grid=True) # #這個相當於在一張圖中疊加畫了起來,並不是一張圖上面顯示兩個散點圖 # plt.show() # from matplotlib import animation#動態圖所需要的包 # import numpy as np # fig,ax = plt.subplots()#子影象 # x = np.arange(0,2*np.pi,0.01) # line, = ax.plot(x,np.sin(x)) # def animate(i): # line.set_ydata(np.sin(x+i/10))#用來改變的y對應的值 # return line, # def init(): # line.set_ydata(np.sin(x))#動態圖初始影象 # return line, # ani = animation.FuncAnimation(fig=fig,func=animate,init_func=init,interval=20)#動態作圖的方法,func動態圖函式,init_func初始化函式,interval指影象改變的時間間隔 # plt.show() # ax1=plt.subplot(2,1,1) # ax1.scatter(data['Surface Area'],data['Heating Load'],marker='*',label='sandiantu1') # ax2=plt.subplot(2,1,2) # ax2.scatter(data['Surface Area'],data['Cooling Load'],marker='*',label='sandiantu2') # plt.show() # x=data.iloc[:,0:8].values.astype(float) # y1=data.iloc[:,8].values.astype(float) # y2=data.iloc[:,9].values.astype(float) # x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y1,test_size=0.40) # linreg = LinearRegression()#建立線性化模型 # linregTr = linreg.fit(x_train,y_train)#訓練集 訓練出的模型 # y_train_pred = linregTr.predict(x_train)#模型預測 # y_test_pred = linregTr.predict(x_test)#測試集模型測試 # mse_train = metrics.mean_squared_error(y_train,y_train_pred) # mse_test = metrics.mean_squared_error(y_test,y_test_pred) # decision_score = linregTr.score(x_train,y_train) # predict_score = linregTr.score(x_test,y_test)#預測之後的準確度打分 # print(data) # print('This is trained data\'s mean_squared_error:',mse_train) # print('This is tested data\'s mean_squared_error:',mse_test) # print('This is decision_score:',decision_score) # print('This is predict_score:',predict_score) from sklearn import model_selection from sklearn import preprocessing # from sklearn import LinearRegression##這樣匯入線性迴歸模型不可實現 from sklearn.linear_model import LinearRegression x=data.iloc[:,0:8] y1=data.iloc[:,8] y2=data.iloc[:,9] x_train,x_test,y_train,y_test=model_selection.train_test_split(x,y1,test_size=0.40) linreg=LinearRegression() linregTr=linreg.fit(x_train,y_train)#開始用訓練集訓練模型 decision_score=linregTr.score(x_train,y_train)#用訓練集得出的效能引數 predict_score=linregTr.score(x_test,y_test)#用測試集得出的效能引數 實際效能 # decision_score_pred=linregTr.predict(x_train)#準確度預測 # predict_score_pred=linregTr.predict(x_test) # x_train_predict=linreg.predict(x_train)#預測的過程中的引數變化過程 # x_test_predict=linreg.predict(x_test) # print(decision_score_pred) # print(predict_score) # print(predict_score) # print(decision_score) # print(predict_score) # from sklearn import metrics from sklearn.metrics import * y_train_pred =linregTr.predict(x_train) # mean_error=metrics.mean_squared_error(y_train,y_train_pred) mean_error=mean_squared_error(y_train_pred,y_train) print(mean_error)