1. 程式人生 > >python常用統計量的計算

python常用統計量的計算

# -*- coding: utf-8 -*-
"""
Created on Tue Jun 27 09:19:25 2017

@author: yunjinqi 
 
E-mail:[email protected] 
 
Differentiate yourself in the world from anyone else.
"""
import pandas as pd  
import scipy.stats as sts  
import numpy as np
#葛洲壩
df=pd.read_excel('C:/Users/HXWD/Desktop/600068.xlsx')  
df.head()  
############################################計算資料的基本統計量:均值,方差,偏度,峰度等  
index=list(df.columns)
stock068=[]
for i in range(1,4):
    scores=np.array(df.ix[::,i])    
    pe=df.ix[::,i].describe()
    pe.name='葛洲壩'+index[i]
    print(pe)
    pe['偏度']=sts.skew(scores)
    pe['峰度']=sts.kurtosis(scores)
    stock068.append(pe)
stock068=pd.DataFrame(stock068).T
print (stock068)
#海瀾之家
df=pd.read_excel('C:/Users/HXWD/Desktop/600398.xlsx')  
df.head()  
############################################計算資料的基本統計量:均值,方差,偏度,峰度等  
index=list(df.columns)
stock398=[]
for i in range(1,4):
    scores=np.array(df.ix[::,i])    
    pe=df.ix[::,i].describe()
    pe.name='海瀾之家'+index[i]
    print(pe)
    pe['偏度']=sts.skew(scores)
    pe['峰度']=sts.kurtosis(scores)
    stock398.append(pe)
stock398=pd.DataFrame(stock398).T
print (stock398)
#data=pd.concat([stock068,stock398],axis=1, join_axes=[data.index])
data=stock068.join(stock398)
print (data)
data.to_csv('統計.csv')

#常用統計量的計算

#2018.01.16重新回來看峰度的計算,發現sts.kurtosis(),df.kurt()兩個計算均存在某些問題,與eviews的描述性統計結果不一致,可能是計算口徑不同。借用一篇別人編寫純程式碼計算的公式,得到了和eviews一樣的結果,程式碼如下

import math
def calc(data):
    n = len(data)
    niu = 0.0
    niu2 = 0.0
    niu3 = 0.0
    for a in data:
        niu += a
        niu2 += a**2
        niu3 += a**3
    niu/= n   #這是求E(X)
    niu2 /= n #這是E(X^2)
    niu3 /= n #這是E(X^3)
    sigma = math.sqrt(niu2 - niu*niu) #這是D(X)的開方,標準差
    return [niu,sigma,niu3] #返回[E(X),標準差,E(X^3)]

def calc_stat(data):
    [niu,sigma,niu3] = calc(data)
    n = len(data)
    niu4 = 0.0
    for a in data:
        a -= niu
        niu4 += a ** 4
    niu4 /= n   
    skew = (niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3)
    kurt =  niu4/(sigma**4)
    return [niu,sigma,skew,kurt] #返回了均值,標準差,偏度,峰度
建議計算峰度的程式使用下面的程式碼