1. 程式人生 > >python 檢測和過濾異常值

python 檢測和過濾異常值

import numpy as np
from pandas import Series, DataFrame

####檢測和過濾異常值
np.random.seed(12345) #設定隨機變數的種子
data = DataFrame(np.random.randn(1000, 4))
print( data.describe() )
'''
                 0            1            2            3
count  1000.000000  1000.000000  1000.000000  1000.000000
mean     -0.067684     0.067924     0.025598    -0.002298
std       0.998035     0.992106     1.006835     0.996794
min      -3.428254    -3.548824    -3.184377    -3.745356
25%      -0.774890    -0.591841    -0.641675    -0.644144
50%      -0.116401     0.101143     0.002073    -0.013611
75%       0.616366     0.780282     0.680391     0.654328
max       3.366626     2.653656     3.260383     3.927528
'''

col = data[3] 
print( col[np.abs(col) > 3] ) # 過濾 
'''
index   data
97     3.927528
305   -3.399312
400   -3.745356
Name: 3, dtype: float64
'''
# any(x)判斷x物件是否為空物件,如果都為空、0、false,則返回false,如果不都為空、0、false,則返回true
# all(x)如果all(x)引數x物件的所有元素不為0、''、False或者x為空物件,則返回True,否則返回False
print( data[(np.abs(data) > 3).any(1)] )
'''
 0         1         2         3
5   -0.539741  0.476985  3.248944 -1.021228
97  -0.774363  0.552936  0.106061  3.927528
102 -0.655054 -0.565230  3.176873  0.959533
305 -2.315555  0.457246 -0.025907 -3.399312
324  0.050188  1.951312  3.260383  0.963301
400  0.146326  0.508391 -0.196713 -3.745356
499 -0.293333 -0.242459 -3.056990  1.918403
523 -3.428254 -0.296336 -0.439938 -0.867165
586  0.275144  1.179227 -3.184377  1.369891
808 -0.362528 -3.548824  1.553205 -2.186301
900  3.366626 -2.372214  0.851010  1.332846
'''
#2
# numpy sign的用法 大於0的返回1.0 小於0的返回-1.0 等於0的返回0.0
data[np.abs(data) > 3] = np.sign(data) * 3
print( data.describe() )
'''
                 0            1            2            3
count  1000.000000  1000.000000  1000.000000  1000.000000
mean     -0.067623     0.068473     0.025153    -0.002081
std       0.995485     0.990253     1.003977     0.989736
min      -3.000000    -3.000000    -3.000000    -3.000000
25%      -0.774890    -0.591841    -0.641675    -0.644144
50%      -0.116401     0.101143     0.002073    -0.013611
75%       0.616366     0.780282     0.680391     0.654328
max       3.000000     2.653656     3.000000     3.000000
'''