python-pandas的基本用法11
阿新 • • 發佈:2019-02-13
pandas的基本用法11-層次化索引
# -*- coding: utf-8 -*- import numpy as np from pandas import Series, DataFrame, MultiIndex print 'Series的層次索引' data = Series([1,3,56,2,88, 32,43,12,65,90], index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]]) print data # a 1 1 # 2 3 # 3 56 # b 1 2 # 2 88 # 3 32 # c 1 43 # 2 12 # d 2 65 # 3 90 print data.index # [a 1, 2, 3, b 1, 2, 3, c 1, 2, d 2, 3] print data[:2] # a 1 1 # 2 3 print data.unstack() # 1 2 3 # a 1 3 56 # b 2 88 32 # c 43 12 NaN # d NaN 65 90 print data.unstack().stack() # a 1 1 # 2 3 # 3 56 # b 1 2 # 2 88 # 3 32 # c 1 43 # 2 12 # d 2 65 # 3 90 print 'DataFrame的層次索引' frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']]) print frame # Ohio Colorado # Green Red Green # a 1 0 1 2 # 2 3 4 5 # b 1 6 7 8 # 2 9 10 11 frame.index.names = ['key1', 'key2'] frame.columns.names = ['state', 'color'] print frame # key1 key2 # a 1 0 1 2 # 2 3 4 5 # b 1 6 7 8 # 2 9 10 11 print frame.ix['a', 1] # state color # Ohio Green 0 # Red 1 # Colorado Green 2 print frame.ix['a', 2]['Colorado'] # color # Green 5 print frame.ix['a', 2]['Ohio']['Red'] # 4 print '直接用MultiIndex建立層次索引結構' print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']], names = ['state', 'color']) print '索引層級交換' frame_swapped = frame.swaplevel('key1', 'key2') print frame_swapped # state Ohio Colorado # color Green Red Green # key2 key1 # 1 a 0 1 2 # 2 a 3 4 5 # 1 b 6 7 8 # 2 b 9 10 11 print frame_swapped.swaplevel(0, 1) #也可以這樣寫 # state Ohio Colorado # color Green Red Green # key1 key2 # a 1 0 1 2 # 2 3 4 5 # b 1 6 7 8 # 2 9 10 11 print '根據索引排序' print frame.sortlevel('key2') # state Ohio Colorado # color Green Red Green # key1 key2 # a 1 0 1 2 # b 1 6 7 8 # a 2 3 4 5 # b 2 9 10 11 print frame.swaplevel(0, 1).sortlevel(0) # state Ohio Colorado # color Green Red Green # key2 key1 # 1 a 0 1 2 # b 6 7 8 # 2 a 3 4 5 # b 9 10 11 print '根據指定的key計算統計資訊' print frame.sum(level = 'key2') # state Ohio Colorado # color Green Red Green # key2 # 1 6 8 10 # 2 12 14 16 print '使用列生成層次索引' frame = DataFrame({'a':range(7), 'b':range(7, 0, -1), 'c':['one', 'one', 'one', 'two', 'two', 'two', 'two'], 'd':[0, 1, 2, 0, 1, 2, 3]}) print frame # a b c d # 0 0 7 one 0 # 1 1 6 one 1 # 2 2 5 one 2 # 3 3 4 two 0 # 4 4 3 two 1 # 5 5 2 two 2 # 6 6 1 two 3 print frame.set_index(['c', 'd']) # 把c/d列變成索引 # c d # one 0 0 7 # 1 1 6 # 2 2 5 # two 0 3 4 # 1 4 3 # 2 5 2 # 3 6 1cd print frame.set_index(['c', 'd'], drop = False) # cd列依然保留 # c d # one 0 0 7 one 0 # 1 1 6 one 1 # 2 2 5 one 2 # two 0 3 4 two 0 # 1 4 3 two 1 # 2 5 2 two 2 # 3 6 1 two 3 frame2 = frame.set_index(['c', 'd']) print frame2.reset_index() #還原 # c d a b # 0 one 0 0 7 # 1 one 1 1 6 # 2 one 2 2 5 # 3 two 0 3 4 # 4 two 1 4 3 # 5 two 2 5 2 # 6 two 3 6 1