python資料分析五:Series和DataFrame的索引的方法(ix()方法)
阿新 • • 發佈:2019-02-04
# -*- coding: utf-8 -*- import pandas as pd from pandas import Series,DataFrame import numpy as np ''' 索引物件 ''' obj=Series(range(3),index=['a','b','c']) print(obj) # a 0 # b 1 # c 2 # dtype: int64 #展示index index=obj.index print(index[1])#b ''' index是不能修改,是為了多個數據索引的安全共享 ''' #index[1]='d' #TypeError: Index does not support mutable operations index=pd.Index(np.arange(4)) obj2=Series([1,2,3,5],index=index) print(obj2.index is index) #True ''' 判斷行索引 列索引是否存在 ''' dict={'hhb':{'2000':1.2,'2001':1.3,'2003':1.5},'zjx':{'2001':1.2,'2003':1.4}} data=DataFrame(dict) print(data) # hhb zjx # 2000 1.2 NaN # 2001 1.3 1.2 # 2003 1.5 1.4 print('hh' in data.columns)#False print('2003' in data.index)#True ''' 重新定義索引 ''' obj=Series([1,3,4,5],index=['a','b','c','d']) print(obj) obj2=obj.reindex(['a','b','c','d','e']) print(obj2) # a 1.0 # b 3.0 # c 4.0 # d 5.0 # e NaN #加入預設值 obj2=obj.reindex(['a','b','c','d','e'],fill_value=0) print(obj2) # a 1 # b 3 # c 4 # d 5 # e 0 #前向填充 obj3=Series(['blue','purple','yellow'],index=[0,2,4]) obj4=obj3.reindex(range(6),method='ffill') print(obj4) # 0 blue # 1 blue # 2 purple # 3 purple # 4 yellow # 5 yellow ''' DataFrame,重定義索引 ''' frame=DataFrame(np.arange(9).reshape((3,3)),index=['a','b','c'],columns=['Ohio','Texas','California']) print(frame) # Ohio Texas California # a 0 1 2 # b 3 4 5 # c 6 7 8 frame2=frame.reindex(['a','b','c','d']) print(frame2) # Ohio Texas California # a 0.0 1.0 2.0 # b 3.0 4.0 5.0 # c 6.0 7.0 8.0 # d NaN NaN NaN statue=["Ohio","Utah","California"] frame3=frame.reindex(columns=statue) print(frame3) # Ohio Utah California # a 0 NaN 2 # b 3 NaN 5 # c 6 NaN 8 #向上賦值 frame4=frame3.reindex(index=['a','b','c','d'],method='ffill',columns=statue) print(frame4) # Ohio Utah California # a 0 NaN 2 # b 3 NaN 5 # c 6 NaN 8 # d 6 NaN 8 #使用ix簡介加入 # frame5=frame.ix[['a','b','c','d'],statue] # print(frame5) ''' 丟棄指定軸上的項 ''' obj=Series(np.arange(5),index=['a','b','c','d','e']) new_obj=obj.drop('c') print(new_obj) # a 0 # b 1 # d 3 # e 4 new_obj=obj.drop(['a','b']) print(new_obj) # c 2 # d 3 # e 4 ''' DataFrame同 ''' data=DataFrame(np.arange(16).reshape((4,4)),index=['Ohio','Colorado','Utah','New York'],columns=['one','two','three','four']) print(data) # one two three four # Ohio 0 1 2 3 # Colorado 4 5 6 7 # Utah 8 9 10 11 # New York 12 13 14 15 data2=data.drop('New York') print(data2) # one two three four # Ohio 0 1 2 3 # Colorado 4 5 6 7 # Utah 8 9 10 11 data3=data2.drop(['four','three'],axis=1) print(data3) # one two # Ohio 0 1 # Colorado 4 5 # Utah 8 9 ''' 索引的選取和過濾 ''' #Series obj=Series(np.arange(4),index=['a','b','c','d']) print(obj) # a 0 # b 1 # c 2 # d 3 print(obj['b'])#1 print(obj[1])#1 print(obj[['b','c']]) # b 1 # c 2 print(obj[1:3]) # b 1 # c 2 print(obj[obj<2]) # a 0 # b 1 print(obj['b':'c']) # b 1 # c 2 #賦值 obj['b':'c']=5 print(obj) # a 0 # b 5 # c 5 # d 3 data=DataFrame(np.arange(16).reshape(4,4),index=['Oh','Ny','CN','USA'],columns=['one','two','three','four']) print(data) #行 print(data[['one','three']]) # one three # Oh 0 2 # Ny 4 6 # CN 8 10 # USA 12 14 #列 print(data[:2]) # one two three four # Oh 0 1 2 3 # Ny 4 5 6 7 print(data[data['three']>5]) # one two three four # Ny 4 5 6 7 # CN 8 9 10 11 # USA 12 13 14 15 #返回blur print(data>5) # one two three four # Oh False False False False # Ny False False True True # CN True True True True # USA True True True True #賦值 data[data<5]=0 print(data) # one two three four # Oh 0 0 0 0 # Ny 0 5 6 7 # CN 8 9 10 11 # USA 12 13 14 15 ''' 根據索引獲取值 ''' data2=data.ix['USA',['one','two']] print(data2) # one 12 # two 13 # Name: USA, dtype: int32 data3=data.ix[['USA','CN'],[3,2,0]] print(data3) # four three one # USA 15 14 12 # CN 11 10 8 print(data.ix[2]) # one 8 # two 9 # three 10 # four 11 # Name: CN, dtype: int32 print(data.ix[:'CN','two']) # Oh 0 # Ny 5 # CN 9 # Name: two, dtype: int32 print(data.ix[data.three>5,:3]) # one two three # Ny 0 5 6 # CN 8 9 10 # USA 12 13 14