1. 程式人生 > >python-pandas的基本用法10

python-pandas的基本用法10

pandas的基本用法10-處理NAN

這裡寫圖片描述

# -*- coding: utf-8 -*- 
import numpy as np
from numpy import nan as NA
from pandas import Series,DataFrame

print '作為null處理的值'
s = Series(['aardvark', 'artichoke', np.nan, 'avocado'])
print s
print s.isnull()
# 0    False
# 1    False
# 2     True
# 3    False
s[0] = None
print s.isnull()
# 0     True
# 1    False
# 2     True
# 3    False
print s.dropna()
# 1    artichoke
# 3      avocado
print s[s.notnull()]
# 1    artichoke
# 3      avocado

print 'DataFrame對丟棄NA的處理'
data = DataFrame([[1., 6.5, 3.], [1., NA, NA],
                  [NA, NA, NA], [NA, 6.5, 3.]])
print data
#     0    1   2
# 0   1  6.5   3
# 1   1  NaN NaN
# 2 NaN  NaN NaN
# 3 NaN  6.5   3
print data.dropna() # 預設只要某行有NA就全部刪除
#    0    1  2
# 0  1  6.5  3
print data.dropna(axis=0, how='all') #某行全部為na才刪除
#     0    1   2
# 0   1  6.5   3
# 1   1  NaN NaN
# 3 NaN  6.5   3

data = DataFrame(np.random.randn(7, 3))
data.ix[:4, 1] = NA
data.ix[:2, 2] = NA
print data
#           0         1         2
# 0  0.819602       NaN       NaN
# 1  0.513875       NaN       NaN
# 2  1.232815       NaN       NaN
# 3 -0.272040       NaN -0.202212
# 4 -0.485529       NaN -0.121475
# 5  0.054189  0.025241  1.031688
# 6 -1.729412 -0.975371 -2.013163
print data.dropna(thresh = 2) # 每行 >=2個NA 就刪除
#           0         1         2
# 3 -0.743106       NaN -0.460403
# 4 -1.379843       NaN -0.495650
# 5  1.151642 -1.087282 -2.163735
# 6 -0.008196  0.674448 -0.650966

print '填充0'
data.fillna(0, inplace=True)
print data
#           0         1         2
# 0 -0.392616  0.000000  0.000000
# 1 -1.069262  0.000000  0.000000
# 2 -0.751801  0.000000  0.000000
# 3 -0.899334  0.000000  1.185419
# 4  0.554094  0.000000  0.823630
# 5 -0.799200 -0.655324 -0.590763
# 6 -0.073688 -1.353579 -0.870911

print '不同行列填充不同的值'
data.ix[:4, 1] = NA
data.ix[:2, 2] = NA
print data
#           0         1         2
# 0  1.289977       NaN       NaN
# 1  0.556263       NaN       NaN
# 2 -1.388250       NaN       NaN
# 3 -0.424846       NaN -1.120281
# 4 -0.885350       NaN  0.103914
# 5 -0.043046 -0.190838  2.351472
# 6  0.291554  0.837331  0.164307
print data.fillna({1:0.5, 2:-1})  # 列1填0.5, 列2填-1