python-pandas的基本用法10
阿新 • • 發佈:2019-01-29
pandas的基本用法10-處理NAN
# -*- coding: utf-8 -*- import numpy as np from numpy import nan as NA from pandas import Series,DataFrame print '作為null處理的值' s = Series(['aardvark', 'artichoke', np.nan, 'avocado']) print s print s.isnull() # 0 False # 1 False # 2 True # 3 False s[0] = None print s.isnull() # 0 True # 1 False # 2 True # 3 False print s.dropna() # 1 artichoke # 3 avocado print s[s.notnull()] # 1 artichoke # 3 avocado print 'DataFrame對丟棄NA的處理' data = DataFrame([[1., 6.5, 3.], [1., NA, NA], [NA, NA, NA], [NA, 6.5, 3.]]) print data # 0 1 2 # 0 1 6.5 3 # 1 1 NaN NaN # 2 NaN NaN NaN # 3 NaN 6.5 3 print data.dropna() # 預設只要某行有NA就全部刪除 # 0 1 2 # 0 1 6.5 3 print data.dropna(axis=0, how='all') #某行全部為na才刪除 # 0 1 2 # 0 1 6.5 3 # 1 1 NaN NaN # 3 NaN 6.5 3 data = DataFrame(np.random.randn(7, 3)) data.ix[:4, 1] = NA data.ix[:2, 2] = NA print data # 0 1 2 # 0 0.819602 NaN NaN # 1 0.513875 NaN NaN # 2 1.232815 NaN NaN # 3 -0.272040 NaN -0.202212 # 4 -0.485529 NaN -0.121475 # 5 0.054189 0.025241 1.031688 # 6 -1.729412 -0.975371 -2.013163 print data.dropna(thresh = 2) # 每行 >=2個NA 就刪除 # 0 1 2 # 3 -0.743106 NaN -0.460403 # 4 -1.379843 NaN -0.495650 # 5 1.151642 -1.087282 -2.163735 # 6 -0.008196 0.674448 -0.650966 print '填充0' data.fillna(0, inplace=True) print data # 0 1 2 # 0 -0.392616 0.000000 0.000000 # 1 -1.069262 0.000000 0.000000 # 2 -0.751801 0.000000 0.000000 # 3 -0.899334 0.000000 1.185419 # 4 0.554094 0.000000 0.823630 # 5 -0.799200 -0.655324 -0.590763 # 6 -0.073688 -1.353579 -0.870911 print '不同行列填充不同的值' data.ix[:4, 1] = NA data.ix[:2, 2] = NA print data # 0 1 2 # 0 1.289977 NaN NaN # 1 0.556263 NaN NaN # 2 -1.388250 NaN NaN # 3 -0.424846 NaN -1.120281 # 4 -0.885350 NaN 0.103914 # 5 -0.043046 -0.190838 2.351472 # 6 0.291554 0.837331 0.164307 print data.fillna({1:0.5, 2:-1}) # 列1填0.5, 列2填-1