1. 程式人生 > >時間序列預測入(二)

時間序列預測入(二)

model left concat mean nal med 分享 color taf

ARIMA預測

# -*- coding: utf-8 -*-
"""
Created on Fri Mar 22 21:03:34 2019

@author: Administrator
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

num = 14
filenames = []
basepath = D:\\pworkspace\\data\\Metro_train\\
for i in range(1, num+1): if i < 10: filenames.append(basepath + record_2019-01-0 + str(i) + .csv) else: filenames.append(basepath + record_2019-01- + str(i) + .csv) flag = True for filename in filenames: df = pd.read_csv(filename) df[time] = df[
time].str[:-4] + 0:00 df[time] = pd.to_datetime(df[time]) df0 = df[df[stationID] == 0].copy() del df user_in = df0[df0[status] == 1] user_out = df0[df0[status] == 0] user_in = user_in.groupby(time) user_out = user_out.groupby(time) user_in = user_in.count() user_out
= user_out.count() user_in[count] = user_in[userID] user_out[count] = user_out[userID] user_in = user_in.drop([lineID, stationID, deviceID, status, payType, userID], axis=1) user_out = user_out.drop([lineID, stationID, deviceID, status, payType, userID], axis=1) if flag: user_in_all = user_in #user_out_all = user_out flag = False else: user_in_all = pd.concat([user_in_all,user_in], axis=0) #user_out_all = pd.concat([user_out_all,user_out], axis=0) #start = datetime(2019,1,1,0,0,0) #timelist = [ str(start + timedelta(seconds=600*i)) for i in range(24 * 6 * 2)] startdate = datetime(2019,1,1,0,0,0) enddate = startdate + timedelta(days=num-1, minutes=50, hours=23) all_time_data = pd.DataFrame({time : pd.date_range(start=str(startdate), end=str(enddate), freq=10T)}) all_time_data[count] = 0 all_time_data.index = all_time_data[time] all_time_data = all_time_data.drop(time, axis=1) user_in_all = pd.merge(all_time_data, user_in_all, right_on=time, left_index=True, how=outer) user_in_all[np.isnan(user_in_all[count_y])] = 0 user_in_all[count_x] = user_in_all[count_x] + user_in_all[count_y] user_in_all[count] = user_in_all[count_x] user_in_all = user_in_all.drop([count_x, count_y], axis=1) user_in_all.plot(figsize=(15,8)) plt.show() ts = user_in_all[count] ts_ewma = pd.DataFrame(ts).ewm(span=60).mean() ts_ewma.plot(figsize=(15,8)) plt.show() from statsmodels.tsa.stattools import acf, pacf, adfuller from statsmodels.stats.diagnostic import acorr_ljungbox from statsmodels.tsa.arima_model import ARIMA #import statsmodels.api as sm ts_diff_1 = ts_ewma.diff(1).dropna(axis=0, how=any) ts_diff_1 = ts_diff_1[count] # ADF平穩性檢驗 adfuller(ts_diff_1, autolag=AIC) # 白噪聲檢驗 acorr_ljungbox(ts_diff_1, 1) # ACF PACF lag_acf = acf(ts_diff_1, nlags=50) lag_pacf = pacf(ts_diff_1, nlags=50) plt.figure(facecolor=white, figsize=(15, 8)) plt.plot(lag_acf) plt.show() plt.figure(facecolor=white, figsize=(15, 8)) plt.plot(lag_pacf) plt.axhline(y=-1.9/np.sqrt(len(ts_diff_1)), linestyle=--, color=gray) plt.axhline(y=1.9/np.sqrt(len(ts_diff_1)), linestyle=--, color=gray) plt.show() model = ARIMA(ts_diff_1, order=(6, 0, 0)) ts_predict = model.fit().predict() rmse = np.sqrt(sum((ts_predict - ts_diff_1)**2) / ts_diff_1.size) plt.figure(facecolor=white, figsize=(15, 8)) plt.plot(ts_predict, lw=0.5, color=blue, label=Predict) plt.plot(ts_diff_1, lw=0.5, color=red, label=Original) plt.legend(loc=lower right) #plt.ylim((-1000, 1000)) plt.show()

運行結果

技術分享圖片

?技術分享圖片

技術分享圖片

技術分享圖片

技術分享圖片

時間序列預測入(二)