python常用統計量的計算
阿新 • • 發佈:2019-01-08
# -*- coding: utf-8 -*- """ Created on Tue Jun 27 09:19:25 2017 @author: yunjinqi E-mail:[email protected] Differentiate yourself in the world from anyone else. """ import pandas as pd import scipy.stats as sts import numpy as np #葛洲壩 df=pd.read_excel('C:/Users/HXWD/Desktop/600068.xlsx') df.head() ############################################計算資料的基本統計量:均值,方差,偏度,峰度等 index=list(df.columns) stock068=[] for i in range(1,4): scores=np.array(df.ix[::,i]) pe=df.ix[::,i].describe() pe.name='葛洲壩'+index[i] print(pe) pe['偏度']=sts.skew(scores) pe['峰度']=sts.kurtosis(scores) stock068.append(pe) stock068=pd.DataFrame(stock068).T print (stock068) #海瀾之家 df=pd.read_excel('C:/Users/HXWD/Desktop/600398.xlsx') df.head() ############################################計算資料的基本統計量:均值,方差,偏度,峰度等 index=list(df.columns) stock398=[] for i in range(1,4): scores=np.array(df.ix[::,i]) pe=df.ix[::,i].describe() pe.name='海瀾之家'+index[i] print(pe) pe['偏度']=sts.skew(scores) pe['峰度']=sts.kurtosis(scores) stock398.append(pe) stock398=pd.DataFrame(stock398).T print (stock398) #data=pd.concat([stock068,stock398],axis=1, join_axes=[data.index]) data=stock068.join(stock398) print (data) data.to_csv('統計.csv')
#常用統計量的計算
#2018.01.16重新回來看峰度的計算,發現sts.kurtosis(),df.kurt()兩個計算均存在某些問題,與eviews的描述性統計結果不一致,可能是計算口徑不同。借用一篇別人編寫純程式碼計算的公式,得到了和eviews一樣的結果,程式碼如下
建議計算峰度的程式使用下面的程式碼import math def calc(data): n = len(data) niu = 0.0 niu2 = 0.0 niu3 = 0.0 for a in data: niu += a niu2 += a**2 niu3 += a**3 niu/= n #這是求E(X) niu2 /= n #這是E(X^2) niu3 /= n #這是E(X^3) sigma = math.sqrt(niu2 - niu*niu) #這是D(X)的開方,標準差 return [niu,sigma,niu3] #返回[E(X),標準差,E(X^3)] def calc_stat(data): [niu,sigma,niu3] = calc(data) n = len(data) niu4 = 0.0 for a in data: a -= niu niu4 += a ** 4 niu4 /= n skew = (niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3) kurt = niu4/(sigma**4) return [niu,sigma,skew,kurt] #返回了均值,標準差,偏度,峰度