python 生成正態分佈資料,並繪圖和解析

阿新 • • 發佈：2020-12-22

1、生成正態分佈資料並繪製概率分佈圖

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# 根據均值、標準差,求指定範圍的正態分佈概率值
def normfun(x,mu,sigma):
  pdf = np.exp(-((x - mu)**2)/(2*sigma**2)) / (sigma * np.sqrt(2*np.pi))
  return pdf


# result = np.random.randint(-65,80,size=100) # 最小值,最大值,數量
result = np.random.normal(15,44,100) # 均值為0.5,方差為1
print(result)

x = np.arange(min(result),max(result),0.1)
# 設定 y 軸，載入剛才的正態分佈函式
print(result.mean(),result.std())
y = normfun(x,result.mean(),result.std())
plt.plot(x,y) # 這裡畫出理論的正態分佈概率曲線

# 這裡畫出實際的引數概率與取值關係
plt.hist(result,bins=10,rwidth=0.8,density=True) # bins個柱狀圖,寬度是rwidth(0~1),=1沒有縫隙
plt.title('distribution')
plt.xlabel('temperature')
plt.ylabel('probability')
# 輸出
plt.show() # 最後圖片的概率和不為1是因為正態分佈是從負無窮到正無窮,這裡指截取了資料最小值到最大值的分佈

根據範圍生成正態分佈：

result = np.random.randint(-65,數量

根據均值、方差生成正態分佈：

result = np.random.normal(15,方差為1

2、判斷一個序列是否符合正態分佈

import numpy as np
from scipy import stats


pts = 1000
np.random.seed(28041990)
a = np.random.normal(0,1,size=pts) # 生成1個正態分佈，均值為0，標準差為1，100個點
b = np.random.normal(2,size=pts) # 生成1個正態分佈，均值為2，標準差為1,100個點
x = np.concatenate((a,b)) # 把兩個正態分佈連線起來，所以理論上變成了非正態分佈序列
k2,p = stats.normaltest(x)
alpha = 1e-3
print("p = {:g}".format(p))


# 原假設:x是一個正態分佈
if p < alpha: # null hypothesis: x comes from a normal distribution
  print("The null hypothesis can be rejected") # 原假設可被拒絕,即不是正態分佈
else:
  print("The null hypothesis cannot be rejected") # 原假設不可被拒絕,即使正態分佈

3、求置信區間、異常值

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd


# 求列表資料的異常點
def get_outer_data(data_list):
  df = pd.DataFrame(data_list,columns=['value'])
  df = df.iloc[:,0]
  # 計算下四分位數和上四分位
  Q1 = df.quantile(q=0.25)
  Q3 = df.quantile(q=0.75)

  # 基於1.5倍的四分位差計算上下須對應的值
  low_whisker = Q1 - 1.5 * (Q3 - Q1)
  up_whisker = Q3 + 1.5 * (Q3 - Q1)

  # 尋找異常點
  kk = df[(df > up_whisker) | (df < low_whisker)]
  data1 = pd.DataFrame({'id': kk.index,'異常值': kk})
  return data1


N = 100
result = np.random.normal(0,N)
# result = np.random.randint(-65,size=N) # 最小值,數量
mean,std = result.mean(),result.std(ddof=1) # 求均值和標準差

# 計算置信區間,這裡的0.9是置信水平
conf_intveral = stats.norm.interval(0.9,loc=mean,scale=std) # 90%概率
print('置信區間:',conf_intveral)

x = np.arange(0,len(result),1)

# 求異常值
outer = get_outer_data(result)
print(outer,type(outer))
x1 = outer.iloc[:,0]
y1 = outer.iloc[:,1]
plt.scatter(x1,y1,marker='x',color='r') # 所有離散點
plt.scatter(x,result,marker='.',color='g') # 異常點
plt.plot([0,len(result)],[conf_intveral[0],conf_intveral[0]])
plt.plot([0,[conf_intveral[1],conf_intveral[1]])
plt.show()

python 生成正態分佈資料,並繪圖和解析

4、取樣點離散圖和概率圖

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import time


print(time.strftime('%Y-%m-%D %H:%M:%S'))


# 根據均值、標準差,求指定範圍的正態分佈概率值
def _normfun(x,sigma):
  pdf = np.exp(-((x - mu)**2)/(2*sigma**2)) / (sigma * np.sqrt(2*np.pi))
  return pdf


# 求列表資料的異常點
def get_outer_data(data_list):
  df = pd.DataFrame(data_list,數量
# result = [100]*100 # 取值全相同
# result = np.array(result)
mean,result.std(ddof=1) # 求均值和標準差
# 計算置信區間,這裡的0.9是置信水平
if std == 0: # 如果所有值都相同即標準差為0則無法計算置信區間
  conf_intveral = [min(result)-1,max(result)+1]
else:
  conf_intveral = stats.norm.interval(0.9,scale=std) # 90%概率
# print('置信區間:',conf_intveral)
# 求異常值
outer = get_outer_data(result)
# 繪製離散圖
fig = plt.figure()
fig.add_subplot(2,1)
plt.subplots_adjust(hspace=0.3)
x = np.arange(0,1)
plt.scatter(x,color='g') # 畫所有離散點
plt.scatter(outer.iloc[:,0],outer.iloc[:,1],color='r') # 畫異常離散點
plt.plot([0,conf_intveral[0]]) # 置信區間線條
plt.plot([0,conf_intveral[1]]) # 置信區間線條
plt.text(0,conf_intveral[0],'{:.2f}'.format(conf_intveral[0])) # 置信區間數字顯示
plt.text(0,conf_intveral[1],'{:.2f}'.format(conf_intveral[1])) # 置信區間數字顯示
info = 'outer count:{}'.format(len(outer.iloc[:,0]))
plt.text(min(x),max(result)-((max(result)-min(result)) / 2),info) # 異常點數顯示
plt.xlabel('sample count')
plt.ylabel('value')
# 繪製概率圖
if std != 0: # 如果所有取值都相同
  fig.add_subplot(2,2)
  x = np.arange(min(result),0.1)
  y = _normfun(x,result.std())
  plt.plot(x,y) # 這裡畫出理論的正態分佈概率曲線
  plt.hist(result,=1沒有縫隙
  info = 'mean:{:.2f}\nstd:{:.2f}\nmode num:{:.2f}'.format(mean,std,np.median(result))
  plt.text(min(x),max(y) / 2,info)
  plt.xlabel('value')
  plt.ylabel('Probability')
else:
  fig.add_subplot(2,2)
  info = 'non-normal distribution!!\nmean:{:.2f}\nstd:{:.2f}\nmode num:{:.2f}'.format(mean,np.median(result))
  plt.text(0.5,0.5,info)
  plt.xlabel('value')
  plt.ylabel('Probability')
plt.savefig('./distribution.jpg')
plt.show()

print(time.strftime('%Y-%m-%D %H:%M:%S'))

python 生成正態分佈資料,並繪圖和解析