1. 程式人生 > 其它 >第八章 財政收入預測分析

第八章 財政收入預測分析

第八章 財政收入預測分析

實訓

實訓1 求取企業所得稅各特徵間的相關係數

import numpy as np
import pandas as pd
inputfile='./income_tax.csv'
data=pd.read_csv(inputfile)
print('相關係數矩陣為:',np.round(data.corr(method='pearson'),2))
相關係數矩陣為:       year    x1    x2    x3    x4    x5    x6    x7    x8    x9   x10     y
year  1.00  1.00  0.98  0.97  0.97  0.95 -0.89  0.95  0.98  0.93  0.97  0.99
x1    1.00  1.00  0.99  0.98  0.98  0.95 -0.89  0.97  0.99  0.93  0.98  0.99
x2    0.98  0.99  1.00  0.99  0.98  0.92 -0.89  0.99  0.98  0.90  0.99  0.98
x3    0.97  0.98  0.99  1.00  0.96  0.92 -0.91  0.99  0.98  0.91  0.99  0.97
x4    0.97  0.98  0.98  0.96  1.00  0.90 -0.82  0.95  0.97  0.92  0.96  0.99
x5    0.95  0.95  0.92  0.92  0.90  1.00 -0.91  0.88  0.96  0.92  0.94  0.95
x6   -0.89 -0.89 -0.89 -0.91 -0.82 -0.91  1.00 -0.87 -0.91 -0.84 -0.92 -0.88
x7    0.95  0.97  0.99  0.99  0.95  0.88 -0.87  1.00  0.97  0.86  0.98  0.95
x8    0.98  0.99  0.98  0.98  0.97  0.96 -0.91  0.97  1.00  0.95  0.99  0.99
x9    0.93  0.93  0.90  0.91  0.92  0.92 -0.84  0.86  0.95  1.00  0.91  0.95
x10   0.97  0.98  0.99  0.99  0.96  0.94 -0.92  0.98  0.99  0.91  1.00  0.98
y     0.99  0.99  0.98  0.97  0.99  0.95 -0.88  0.95  0.99  0.95  0.98  1.00

實訓2 選取企業所得稅預測關鍵特徵

from sklearn.linear_model import Lasso
lasso=Lasso(normalize=True,max_iter=10000)

lasso.fit(data.iloc[:,1:11],data['y'])
print('相關係數為:',np.round(lasso.coef_,5))
相關係數為: [ 1.61300000e-02 -1.05300000e-02 -5.01000000e-03  3.87999184e+03
 -1.66000000e-03 -9.72594842e+03  2.42900000e-02 -3.23400000e-02
  7.08000000e-03  8.41000000e-03]
#計算相關係數非0的個數為
print('相關係數非零個數為:',np.sum(lasso.coef_!=0))
相關係數非零個數為: 10
mask=lasso.coef_ !=0
print('相關係數是否為0:',mask)
相關係數是否為0: [ True  True  True  True  True  True  True  True  True  True]
outputfile='./new_reg_data1.csv'
mask = np.append(mask,True)
mask = np.append(mask,True)
new_reg_data=data.iloc[:,mask]
new_reg_data.
to_csv(outputfile) print('輸出資料的維度為:',new_reg_data.shape)
輸出資料的維度為: (12, 12)
mask
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

實訓3 構建企業所得稅預測模型

import numpy as np
import pandas as pd
from GM11 import GM11
inputfile='./income_tax.csv'
data=pd.read_csv(inputfile)
inputfile1='./new_reg_data1.csv'
data1=pd.read_csv(inputfile1)
data1
Unnamed: 0yearx1x2x3x4x5x6x7x8x9x10y
002004121134161889547910092421559.6207541631.9937339228092210531562690984236416
112005148592612162782511751668554.5318474429.87478578716721711544253005475268360
222006178806382545341313489283566.1398195930.69545931415495814344403384477326556
332007204521832978794115191582575.2404830531.63633138218667836217574088545373397
442008244151603511842516963824582.1538845128.95687040621939041963014767231455820
552009282578054164668118633437599.0753114724.88750710937683970682658389925596693
662010322787174890325021055373633.1693026930.858754491458096178298858431405756412
772011340515885560771026598516612.8779116523.16101340504857601701922211076649732282
882012400226586557452532635731632.41031274420.42128052886537362619283513991612935248
992013457697637641920734122005664.7958526322.551561317166804321639131153513871061594
10102014472065048616794837583868677.3825604820.901741707270373321396742157968041075045
11112015522734319964337344545508680.71105375119.702182889587788922659148208813741155923
data
yearx1x2x3x4x5x6x7x8x9x10y
02004121134161889547910092421559.6207541631.9937339228092210531562690984236416
12005148592612162782511751668554.5318474429.87478578716721711544253005475268360
22006178806382545341313489283566.1398195930.69545931415495814344403384477326556
32007204521832978794115191582575.2404830531.63633138218667836217574088545373397
42008244151603511842516963824582.1538845128.95687040621939041963014767231455820
52009282578054164668118633437599.0753114724.88750710937683970682658389925596693
62010322787174890325021055373633.1693026930.858754491458096178298858431405756412
72011340515885560771026598516612.8779116523.16101340504857601701922211076649732282
82012400226586557452532635731632.41031274420.42128052886537362619283513991612935248
92013457697637641920734122005664.7958526322.551561317166804321639131153513871061594
102014472065048616794837583868677.3825604820.901741707270373321396742157968041075045
112015522734319964337344545508680.71105375119.702182889587788922659148208813741155923
data1.index=range(2004,2016)
data1.index
RangeIndex(start=2004, stop=2016, step=1)
data1.loc[2014]=None
data1.loc[2015]=None
l=['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10']
for i in l:
    f=GM11(data1.loc[range(2004,2014),i].as_matrix())[0]
    data1.loc[2014,i]=f(len(data1)-1)
    data1.loc[2015,i]=f(len(data1)-1)
    data1[i]=data1[i].round(2)
outputfile='./data1_GM11.xls'

y=list(data['y'].values)
# data['y']
# y.extend([np.nan,np.nan])
data1['y']=y
# data1['y']
data1.to_excel(outputfile)
print('預測結果:',data1.loc[2014:2015,:])
預測結果:       Unnamed: 0  year           x1           x2           x3      x4  \
2014         NaN   NaN  52340607.88  89248500.46  40098282.29  668.71   
2015         NaN   NaN  52340607.88  89248500.46  40098282.29  668.71   

               x5     x6           x7        x8           x9          x10  \
2014  12106141.48  21.42  17086599.52  892880.8  45963285.04  20247419.64   
2015  12106141.48  21.42  17086599.52  892880.8  45963285.04  20247419.64   

            y  
2014  1075045  
2015  1155923  


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:5: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  """
#支援向量機迴歸預測模型
import pandas as pd
import numpy as np
from sklearn.svm import LinearSVR
import matplotlib.pyplot as plt
from sklearn.metrics import explained_variance_score,\
mean_absolute_error,mean_squared_error,\
median_absolute_error,r2_score
inputfile='./data1_GM11.xls'
data=pd.read_excel(inputfile)
data
feature=['x1','x2','x3','x4','x5','x6','x7','x8','x9','x10']
data_train=data.loc[range(0,12)].copy()
data_mean=data_train.mean()
data_std=data_train.std()
data_train=(data_train-data_mean)/data_std
x_train=data_train[feature].as_matrix()
y_train=data_train['y'].as_matrix()
linearsvr=LinearSVR()
linearsvr.fit(x_train,y_train)
x=((data[feature]-data_mean[feature])/\
  data_std[feature]).as_matrix()
data[u'y_pred']=linearsvr.predict(x) * \
data_std['y']+data_mean['y']
outputfile='./data1_GM11_revenue.xls'
data.to_excel(outputfile)
print('真實值與預測值分別為:\n',data[['y','y_pred']])
真實值與預測值分別為:
           y        y_pred
0    236416  2.367047e+05
1    268360  2.683600e+05
2    326556  3.274633e+05
3    373397  3.794810e+05
4    455820  4.558120e+05
5    596693  5.968044e+05
6    756412  7.534051e+05
7    732282  7.322462e+05
8    935248  9.158043e+05
9   1061594  1.061308e+06
10  1075045  1.155385e+06
11  1155923  1.155385e+06


C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:17: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:18: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:22: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
print('預測圖:',data[['y','y_pred']].plot(subplots=True,style=['b-o','r-*'],xticks=data.index[::2]))
預測圖: [<matplotlib.axes._subplots.AxesSubplot object at 0x0000022832C78808>
 <matplotlib.axes._subplots.AxesSubplot object at 0x0000022832D26F48>]

在這裡插入圖片描述