1. 程式人生 > >python-pandas的基本用法11

python-pandas的基本用法11

pandas的基本用法11-層次化索引

# -*- coding: utf-8 -*- 

import numpy as np
from pandas import Series, DataFrame, MultiIndex

print 'Series的層次索引'
data = Series([1,3,56,2,88, 32,43,12,65,90],
              index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
                       [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])


print data
# a  1     1
#    2     3
#    3    56
# b  1     2
#    2    88
#    3    32
# c  1    43
#    2    12
# d  2    65
#    3    90
print data.index
# [a  1,    2,    3, b  1,    2,    3, c  1,    2, d  2,    3]
print data[:2]
# a  1    1
#    2    3
print data.unstack()
#     1   2   3
# a   1   3  56
# b   2  88  32
# c  43  12 NaN
# d NaN  65  90
print data.unstack().stack()
# a  1     1
#    2     3
#    3    56
# b  1     2
#    2    88
#    3    32
# c  1    43
#    2    12
# d  2    65
#    3    90

print 'DataFrame的層次索引'
frame = DataFrame(np.arange(12).reshape((4, 3)),
                  index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])

print frame
#       Ohio       Colorado
#      Green  Red     Green
# a 1      0    1         2
#   2      3    4         5
# b 1      6    7         8
#   2      9   10        11
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
print frame
# key1 key2                      
# a    1         0    1         2
#      2         3    4         5
# b    1         6    7         8
#      2         9   10        11

print frame.ix['a', 1]
# state     color
# Ohio      Green    0
#           Red      1
# Colorado  Green    2
print frame.ix['a', 2]['Colorado']
# color
# Green    5
print frame.ix['a', 2]['Ohio']['Red']
# 4
print '直接用MultiIndex建立層次索引結構'
print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']],
                             names = ['state', 'color'])

print '索引層級交換'
frame_swapped = frame.swaplevel('key1', 'key2')
print frame_swapped
# state       Ohio       Colorado
# color      Green  Red     Green
# key2 key1                      
# 1    a         0    1         2
# 2    a         3    4         5
# 1    b         6    7         8
# 2    b         9   10        11
print frame_swapped.swaplevel(0, 1) #也可以這樣寫
# state       Ohio       Colorado
# color      Green  Red     Green
# key1 key2                      
# a    1         0    1         2
#      2         3    4         5
# b    1         6    7         8
#      2         9   10        11

print '根據索引排序'
print frame.sortlevel('key2')
# state       Ohio       Colorado
# color      Green  Red     Green
# key1 key2                      
# a    1         0    1         2
# b    1         6    7         8
# a    2         3    4         5
# b    2         9   10        11
print frame.swaplevel(0, 1).sortlevel(0)
# state       Ohio       Colorado
# color      Green  Red     Green
# key2 key1                      
# 1    a         0    1         2
#      b         6    7         8
# 2    a         3    4         5
#      b         9   10        11

print '根據指定的key計算統計資訊'
print frame.sum(level = 'key2')
# state   Ohio       Colorado
# color  Green  Red     Green
# key2                       
# 1          6    8        10
# 2         12   14        16

print '使用列生成層次索引'
frame = DataFrame({'a':range(7),
                   'b':range(7, 0, -1),
                   'c':['one', 'one', 'one', 'two', 'two', 'two', 'two'],
                   'd':[0, 1, 2, 0, 1, 2, 3]})
print frame
#  a  b    c  d
# 0  0  7  one  0
# 1  1  6  one  1
# 2  2  5  one  2
# 3  3  4  two  0
# 4  4  3  two  1
# 5  5  2  two  2
# 6  6  1  two  3
print frame.set_index(['c', 'd'])  # 把c/d列變成索引
# c   d      
# one 0  0  7
#     1  1  6
#     2  2  5
# two 0  3  4
#     1  4  3
#     2  5  2
#     3  6  1cd
print frame.set_index(['c', 'd'], drop = False) # cd列依然保留
# c   d              
# one 0  0  7  one  0
#     1  1  6  one  1
#     2  2  5  one  2
# two 0  3  4  two  0
#     1  4  3  two  1
#     2  5  2  two  2
#     3  6  1  two  3
frame2 = frame.set_index(['c', 'd'])
print frame2.reset_index() #還原
#      c  d  a  b
# 0  one  0  0  7
# 1  one  1  1  6
# 2  one  2  2  5
# 3  two  0  3  4
# 4  two  1  4  3
# 5  two  2  5  2
# 6  two  3  6  1