時間序列預測之keras

阿新 • • 發佈：2018-12-24

'''
多變數時間序列預測
'''


import os
os.getcwd()
os.chdir('C:\\Users\\87671\\Desktop\\位元魔方')
from pandas import read_csv
from datetime import datetime
from numpy import concatenate
from matplotlib import pyplot
from pandas import concat
from pandas import DataFrame
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
# 把日期轉換成指定格式 年月日小時
def parse(x):
    return datetime.strptime(x, '%Y %m %d %H')

dataset = read_csv('raw.csv', parse_dates=[['year','month','day','hour']],index_col=0,date_parser=parse)
# parse_dates指定時間轉化的列 ,date_parser指定轉化的形式，index_col指定某列作為索引,一箇中括號在一起表示合併在了一起 
dataset.drop('No',axis =1 ,inplace = True)
# 賦予特殊的列名
dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain']
dataset.index.name = 'date'
# 填充NA為0
dataset['pollution'].fillna(0,inplace = True)
# 刪除前24行的資料
dataset = dataset[24:]
print(dataset.head(5))
#dataset.to_csv('pollution.csv')

dataset = read_csv("pollution.csv",header = 0,index_col = 0)

# 畫圖，顯示每個變數的5年資料
values = dataset.values                                      # 首先提取資料框裡面的values
groups = [0,1,2,3,5,6,7]
i = 1
pyplot.figure()
for group in groups:
    pyplot.subplot(len(groups),1,i)                          # 子圖要幾行幾列，每個圖在第幾個
    pyplot.plot(values[:,group])
    pyplot.title(dataset.columns[group],y = 0.5,loc='right')
    i += 1
pyplot.show()


'''
明確監督問題：根據前1個小時的天氣情況x和y，預測下一個階段的y
將時間序列資料集轉化為監督學習問題
基礎知識
1.df['t-1']=df['t'].shift(1) #建立了和t之後一項的序列，同等長度，前面的第一個變成了NA
  df['t-1']=df['t'].shift(-1) # 提前一項，最後一項為nan
'''


def series_to_supervised(data, n_in=1,n_out=1,dropnan = True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols,names = list(),list()
    # 輸入序列(t-n,...,t-1)
    for i in range(n_in,0,-1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # 預測序列(t ,t+1,...,t+n)
    for i in range(0, n_out):              # 只包括前不包括後，所以range0,1代表只有一個
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # 將他們放在一起
    agg = concat(cols,axis=1)
    agg.columns = names
    # 去除有na的行
    if dropnan:
        agg.dropna(inplace = True)
    return agg

'''
舉例
## 單變數(就只有一個時間變數)
values = [x for x in range(10)]
data = series_to_supervised(values)
print(data)
data = series_to_supervised(values,3)         # 滯後三項預測一個y
print(data)
data = series_to_supervised(values,2,2)      # 滯後2,預測2
print(data)
## 多變數(以2個時間變數為例子)
raw = DataFrame()
raw['ob1'] = [x for x in range(10)]
raw['ob2'] = [x for x in range(50, 60)]
values = raw.values
data = series_to_supervised(values)        # 2個都滯後一項
print(data)
'''
# 將類別變數（第5列）獨熱編碼
encoder = preprocessing.LabelEncoder()
values[:,4] = encoder.fit_transform(values[:,4])
# 確保所有的data是float形式
values = values.astype('float32')
# 標準化壓縮到0-1之間
scaler = preprocessing.MinMaxScaler(feature_range=(0,1)) 
scaled = scaler.fit_transform(values)
###########################################################################timestep=1
# 使用時間序列變監督學習的函式
reframed1 = series_to_supervised(scaled,1,1) 
# 其實我們只需要預測的是第一個變數，所以其他變數對應的時間序列可以刪去
reframed1.drop(reframed1.columns[[9,10,11,12,13,14,15,]], axis = 1,inplace=True)
print(reframed1.head())  #前八個變數時原來資料集的X和Y，最後一個是Y

'''
現在資料準備好了，開始擬合模型
'''
#劃分訓練集測試集（這裡講第一年當做訓練集，其餘4年當做測試集）
values = reframed1.values
n_train_hours = 365*24
train = values[:n_train_hours,:]
test = values[n_train_hours:,:]
#劃分X和Y
train_X,train_y = train[:,:-1],train[:,-1]
test_X,test_y = test[:,:-1],test[:,-1]
#X轉化成LSTM需要的格式[樣本，時間步長，特徵]
train_X = train_X.reshape((train_X.shape[0],1,train_X.shape[1])) 
test_X = test_X.reshape((test_X.shape[0],1,test_X.shape[1]))
print(train_X.shape,train_y.shape,test_X.shape,test_y.shape)
# 設計網路
model = Sequential()
# 50個神經元，X[1]是一個時間步長,X[2]是8個維度
model.add(LSTM(50, input_shape=(train_X.shape[1],train_X.shape[2])))
model.add(Dense(1))
# 平均絕對誤差mae
model.compile(loss='mae',optimizer = 'adam')
# 擬合network
# 50個批量大小為72的訓練時機
history = model.fit(train_X,train_y,epochs=50,batch_size=72,validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'],label='train')
pyplot.plot(history.history['val_loss'],label='test')
pyplot.legend()
pyplot.show()
# 看圖觀察可能過擬合

'''
模型評估
'''
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))  # 3W條資料，8個特徵
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)       # 拼接成y對應X7個變數的格式
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]                                    # 只選擇第一列也就是我們真正的yhat
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))           # 計算mse
print('Test RMSE: %.3f' % rmse)



##################################################################################時間跨度為2呢

reframed = series_to_supervised(scaled,2,1) 
'''
不能再像原來那樣刪這些編號啦，可以這樣做
train_X, train_y = train[:, 0:n_obs], train[:, -n_features]
reframed.drop(reframed.columns[[9,10,11,12,13,14,15,]], axis = 1,inplace=True)
'''
'''
現在資料準備好了，開始擬合模型
'''
#劃分訓練集測試集（這裡講第一年當做訓練集，其餘4年當做測試集）
values = reframed.values
n_train_hours = 365*24
train = values[:n_train_hours,:]
test = values[n_train_hours:,:]
#劃分X和Y
# 和step=1不一樣
n_hours = 2                # timestep=2
n_features = 8
n_obs = n_hours * n_features  # step=2
train_X, train_y = train[:, 0:n_obs], train[:, -n_features]
test_X, test_y = test[:, 0:n_obs], test[:, -n_features]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
#這裡可以檢查train_X和reframed各自什麼樣子，經驗證是我們需要的樣子！
test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

'''
train_X,train_y = train[:,:-1],train[:,-1]
test_X,test_y = test[:,:-1],test[:,-1]
#X轉化成LSTM需要的格式[樣本，時間不長，特徵]
train_X = train_X.reshape((train_X.shape[0],1,train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0],1,test_X.shape[1]))
'''

# 設計網路
model = Sequential()
# 50個神經元，X是一個時間步長，8個維度
model.add(LSTM(50, input_shape=(train_X.shape[1],train_X.shape[2])))
model.add(Dense(1))
# 平均絕對誤差mae
model.compile(loss='mae',optimizer = 'adam')
# 擬合network
# 50個批量大小為72的訓練時機
history = model.fit(train_X,train_y,epochs=50,batch_size=72,validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'],label='train')
pyplot.plot(history.history['val_loss'],label='test')
pyplot.legend()
pyplot.show()
# 看圖觀察可能過擬合

'''
模型評估
'''

'''
# make a prediction 返回原來的單位
trainPredict = model.predict(train_X)
testPredict = model.predict(test_X)
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
import math
trainScore = math.sqrt(mean_squared_error(trainY[0],trainPredict[:,0]))
trainScore = math.sqrt(mean_squared_error(trainY[0],trainPredict[:,0]))
print('Train RMSE: %.3f' % trainScore)         
print('Test RMSE: %.3f' % testScore)
'''


yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]*n_hours))  # 3W條資料，16個特徵
# invert scaling for forecast
inv_yhat = concatenate((yhat, test_X[:, 1:8]), axis=1)               # 因為之前標準化之前是8列，所以inverse的時候也要是8列，
                                                                     # 當然我們有用的只是第一列，所以1:8,8:15都可以
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]                                             # 只選擇第一列也就是我們真正的yhat
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:8]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))                     # 計算mse
print('Test RMSE: %.3f' % rmse)                                      # 26.335

# 看一下預測的曲線（這裡值取了前面50個）
plt.plot(inv_y[0:50],color='green')
plt.plot(inv_yhat[0:50])
plt.show()

時間序列預測之keras

''' 多變數時間序列預測 ''' import os os.getcwd() os.chdir('C:\\Users\\87671\\Desktop\\位元魔方') from pandas import read_csv from datetime import datetime from n

keras-anomaly-detection 代碼分析——本質上就是SAE、LSTM時間序列預測

encoding urn odin forward mean code -a reat ati keras-anomaly-detection Anomaly detection implemented in Keras The source codes of the re

Keras LSTM 時間序列預測

Keras LSTM 時間序列預測 international-airline-passengers.csv資料記錄： time,passengers "1949-01",112 "1949-02",118 "1949-03",132 "1949-04",129

基於Keras的LSTM多變數時間序列預測（學習筆記）

本文翻譯自Jason Brownlee的部落格https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/ 本部落格主要參考以下中文版部落格 https://blog.csdn.net/qq_280

基於Keras的LSTM多變數時間序列預測（北京PM2.5資料集pollution.csv）

基於Keras的LSTM多變數時間序列預測　　傳統的線性模型難以解決多變數或多輸入問題

深度學習之LSTM時間序列預測

LSTM（long short-term memory,LSTM）:長短時記憶網路，是迴圈神經網路（recurrent neural network,RNN）的一個重要結構，迴圈神經的主要用途是處理和預測序列資料。全連線神經網路（感知機，BP神經網路，RBF 神經網路等）或卷

86、使用Tensorflow實現，LSTM的時間序列預測，預測正弦函數

ati pre win real testing could sqrt sha ima ‘‘‘ Created on 2017年5月21日 @author: weizhen ‘‘‘ # 以下程序為預測離散化之後的sin函數 import numpy as np impo

2017.06.9 金融時間序列分析之Eview使用基礎

file 打開操作 span bject 生成 com 工作表時間序列分析一.創建時間序列工作文件：首先將數據轉換為Eviews系統能夠分析的Eviews Workfile數據集 1.創建工作文件：工作文件結構類型：非結構/非日期型；日期-規則頻率型；平衡面板型；

時間序列預測——深度好文

原文地址：https://medium.com/open-machine-learning-course/open-machine-learning-course-topic-9-time-series-analysis-in-python-a270cb05e0b3 Open Machine Learnin

Tensorflow LSTM時間序列預測的嘗試

一、網上的資源網上有不少用LSTM來預測時間序列的資源，如下面：深度學習（08）_RNN-LSTM迴圈神經網路-03-Tensorflow進階實現 http://blog.csdn.net/u013082989/article/details/73693392 Applying

時間序列預測演算法總結

時間序列演算法 time series data mining 主要包括decompose（分析資料的各個成分，例如趨勢，週期性），prediction（預測未來的值），classification（對有序資料序列的feature提取與分類），clustering（相似數列聚類）等。時間序

LSTM時間序列預測學習

一、檔案準備工作下載好的例程式二、開始執行 1、在程式所在目錄中（chapter_15）開啟終端輸入下面的指令執行 python train_lstm.py 此時出現了報錯提

基於深度學習時間序列預測系統專案需求分析心得

專案第一次迭代已經進入了尾聲，在我們小組剛確定這個專案的時候，花了兩個周的時間來確定專案的需求。以下是我們在進行需求分析的一些心得。需求分析過程：　　(1) 小組內部進行討論：在進行團隊專案開發之初，我們在需求分析還有資料庫設計上花了很多時間，首先是進行多次需求分析的團隊會議，小組人員

ARIMA 時間序列預測

ARIMA 時間序列預測（學習資料及程式碼均從網上獲取。）資料記錄AirPassengers.csv： Month,#Passengers 1949-01,112 1949-02,118 1949-03,132 1949-04,129 1949-05,121 1949

長短期記憶（LSTM）系列_1.1、迴歸神經網路在時間序列預測中的介紹和應用

摘要，通過本文你可以學到：傳統的時間序列預測方法側重於具有線性關係的單變數資料以及固定和手動診斷的時間依賴性。神經網路增加了學習可能的噪聲和非線性關係的能力，其中任意定義但固定數量的輸入和輸出支援多變數和多步預測。遞迴神經網路增加了有序觀察的顯式處理和從上下文學習時間依賴

基於長短期記憶神經網路LSTM的多步長時間序列預測

基於長短期記憶神經網路LSTM的多步長多變數時間序列預測長短時記憶網路(LSTM)是一種能夠學習和預測長序列的遞迴神經網路。LSTMs除了學習長序列外，還可以學習一次多步預測，這對於時間序列的預測非

使用Python為時間序列預測建立ARIMA模型

如何在Python中為時間序列預測建立ARIMA模型 ARIMA模型是一種流行且廣泛使用的

將時間序列預測問題轉換為python中的監督學習問題

像深度學習這樣的機器學習方法可以用於時間序列預測。在機器學習方法可以被使用前，時間序列預測問題必須重新構建成監督學習問題，從一個單純的序列變成一對序列輸入和輸出。在這個教程中，你將瞭解如何將單變數和多變數時間序列預測問題轉換為與機器學習演算法一起使用的監督學習問題

8.4.2 時間序列預測——使用TFLearn自定義模型——程式碼執行錯誤及解決方法

《TensorFlow》：實戰Google深度學習框架中第八章的——8.4.2 時間序列預測——使用TFLearn自定義模型下的原始碼執行報錯：原因分析： score=metrics.accuracy_score(y_test,y_predicted)該句程式碼中y_predi

LSTM時間序列預測及網路層搭建

1. LSTM預測未來一年某航空公司的客運流量這裡的問題是：給你一個數據集，只有一列資料，這是一個關於時間序列的資料，從這個時間序列中預測未來一年某航空公司的客運流量。資料形式： time passengers 0 1949-01

時間序列預測之keras

相關推薦