1. 程式人生 > 其它 >記錄RandomForestRegressor&MLPRegressor處理時間序列資料

記錄RandomForestRegressor&MLPRegressor處理時間序列資料

資料概況:八個變數一個目標值;時間序列資料;

資料處理:歸一化後的資料更適用神經網路模型;

方法:隨機森林迴歸、神經網路模型(效果不佳)。

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import max_error
#---------------------------------------------------------------------------------
# 準備資料
# 匯入資料
data = pd.read_csv("yiyao.csv", encoding="utf8", low_memory=False)
#data = pd.read_csv("yiyaobzh.csv", encoding="utf8", low_memory=False)
data.head()
data.info()
# 選取特徵及分類標籤
features = data.iloc[:,[2,3,4,5,6,7,8,9]].values
targets = data.iloc[:,1].values
#分測試集,訓練集
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.3, shuffle=False) # 訓練集前部分,測試集後部分,不打亂順序
print('X_train shape:{}'.format(X_train.shape))
print('X_test shape:{}'.format(X_test.shape))

隨機森林模型

# 隨機森林模型
from sklearn.ensemble import RandomForestRegressor

regr = RandomForestRegressor(#n_estimators=50,
                             #max_features=32,
                             #random_state=1
                                )

regr = regr.fit(X_train,y_train)
# 輸出預測結果
y_pred = regr.predict(X_test)
y_pred
# 將預測結果輸出到檔案中
y_pred=pd.DataFrame(y_pred)
y_pred.to_csv('y_pred.csv',header=True,index=False)
# 輸出測試集模型得分
score_r = regr.score(X_test,y_test)
print("Random Forest:{}".format(score_r))
# mape
metrics.mean_absolute_percentage_error(y_test, y_pred)
# me
metrics.max_error(y_test, y_pred)
# mae
metrics.mean_absolute_error(y_test, y_pred)
# mse
metrics.mean_squared_error(y_test, y_pred)
#返回特徵的重要性
#regr.feature_importances_
feature_name=["x1","x2","x3","x4","x5","x6","x7","x8"]
imp=[*zip(feature_name,regr.feature_importances_)]
imp
x=[]
y=[]
for i in range(0,8):
    x.append(imp[i][0])
for i in range(0,8):
    y.append(imp[i][1])
    
%matplotlib inline
plt.figure(figsize=(15, 10))

plt.barh(x,y,color='green')
plt.xlabel("Degree of importance")
plt.ylabel("Features")
plt.show()

神經網路模型

# 歸一化處理
from sklearn.preprocessing import StandardScaler

stdScaler = StandardScaler()

X_train = stdScaler.fit_transform(X_train) # 訓練集資料
X_test = stdScaler.fit_transform(X_test) 
from sklearn.neural_network import MLPRegressor
regr = MLPRegressor(
                    random_state=42
                    ,hidden_layer_sizes=(4, 2)
                    ,max_iter=50000
                    #,activation='relu'
                   )
regr = regr.fit(X_train,y_train)
# 輸出預測結果
y_pred = regr.predict(X_test)
#y_pred
# 輸出測試集模型得分
score_r = regr.score(X_test,y_test)
print(":{}".format(score_r))
# 輸出相關引數
regr.get_params()