Python時間序列LSTM預測系列學習筆記(5)-單變數
阿新 • • 發佈:2018-12-08
本文是對:
https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/
https://blog.csdn.net/iyangdi/article/details/77868744
兩篇博文的學習筆記,兩個博主筆風都很浪,有些細節一筆帶過,本人以謙遜的態度進行了學習和整理,筆記內容都在程式碼的註釋中。有不清楚的可以去原博主文中檢視。
資料集下載:https://datamarket.com/data/set/22r0/sales-of-shampoo-over-a-three-year-period
後期我會補上我的github
這一節對第四節的程式碼進行了升級,進行了30次重複預測,並且對最後得到的RMSE的值求了方差,從而達到對預測效能進行一個評估股的作用
# coding=utf-8 from pandas import read_csv from pandas import datetime from pandas import concat from pandas import DataFrame from pandas import Series from sklearn.metrics import mean_squared_error from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from math import sqrt from matplotlib import pyplot import numpy # 讀取時間資料的格式化 def parser(x): return datetime.strptime(x, '%Y/%m/%d') # 轉換成有監督資料 def timeseries_to_supervised(data, lag=1): df = DataFrame(data) columns = [df.shift(i) for i in range(1, lag + 1)] # 資料滑動一格,作為input,df原資料為output columns.append(df) df = concat(columns, axis=1) df.fillna(0, inplace=True) return df # 轉換成差分資料 def difference(dataset, interval=1): diff = list() for i in range(interval, len(dataset)): value = dataset[i] - dataset[i - interval] diff.append(value) return Series(diff) # 逆差分 def inverse_difference(history, yhat, interval=1): # 歷史資料,預測資料,差分間隔 return yhat + history[-interval] # 縮放 def scale(train, test): # 根據訓練資料建立縮放器 scaler = MinMaxScaler(feature_range=(-1, 1)) scaler = scaler.fit(train) # 轉換train data train = train.reshape(train.shape[0], train.shape[1]) train_scaled = scaler.transform(train) # 轉換test data test = test.reshape(test.shape[0], test.shape[1]) test_scaled = scaler.transform(test) return scaler, train_scaled, test_scaled # 逆縮放 def invert_scale(scaler, X, value): new_row = [x for x in X] + [value] array = numpy.array(new_row) array = array.reshape(1, len(array)) inverted = scaler.inverse_transform(array) return inverted[0, -1] # fit LSTM來訓練資料 def fit_lstm(train, batch_size, nb_epoch, neurons): X, y = train[:, 0:-1], train[:, -1] X = X.reshape(X.shape[0], 1, X.shape[1]) model = Sequential() # 新增LSTM層 model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(1)) # 輸出層1個node # 編譯,損失函式mse+優化演算法adam model.compile(loss='mean_squared_error', optimizer='adam') for i in range(nb_epoch): # 按照batch_size,一次讀取batch_size個數據 model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False) model.reset_states() print("當前計算次數:"+str(i)) return model # 1步長預測 def forcast_lstm(model, batch_size, X): X = X.reshape(1, 1, len(X)) yhat = model.predict(X, batch_size=batch_size) return yhat[0, 0] # 載入資料 series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) # 讓資料變成穩定的 raw_values = series.values diff_values = difference(raw_values, 1)#轉換成差分資料 # 把穩定的資料變成有監督資料 supervised = timeseries_to_supervised(diff_values, 1) supervised_values = supervised.values # 資料拆分:訓練資料、測試資料,前24行是訓練集,後12行是測試集 train, test = supervised_values[0:-12], supervised_values[-12:] # 資料縮放 scaler, train_scaled, test_scaled = scale(train, test) #重複實驗 repeats = 30 error_scores = list() for r in range(repeats): # fit 模型 lstm_model = fit_lstm(train_scaled, 1, 100, 4) # 訓練資料,batch_size,epoche次數, 神經元個數 # 預測 train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)#訓練資料集轉換為可輸入的矩陣 lstm_model.predict(train_reshaped, batch_size=1)#用模型對訓練資料矩陣進行預測 # 測試資料的前向驗證,實驗發現,如果訓練次數很少的話,模型回簡單的把資料後移,以昨天的資料作為今天的預測值,當訓練次數足夠多的時候 # 才會體現出來訓練結果 predictions = list() for i in range(len(test_scaled)): # 1步長預測 X, y = test_scaled[i, 0:-1], test_scaled[i, -1] yhat = forcast_lstm(lstm_model, 1, X) # 逆縮放 yhat = invert_scale(scaler, X, yhat) # 逆差分 yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i) predictions.append(yhat) expected = raw_values[len(train) + i + 1] print('Moth=%d, Predicted=%f, Expected=%f' % (i + 1, yhat, expected)) # 效能報告 rmse = sqrt(mean_squared_error(raw_values[-12:], predictions)) print('%d) Test RMSE:%.3f' %(r+1,rmse)) error_scores.append(rmse) #統計資訊 results = DataFrame() results['rmse'] = error_scores print(results.describe()) results.boxplot() pyplot.show()