Python 清洗數據
阿新 • • 發佈:2018-07-17
drop index pytho code 一行 dataframe cor 進行 rop
import numpy as np import pandas as pd from pandas import Series,DataFrame s=Series([1,2,3],index=[‘a‘,‘b‘,‘c‘]) print(s) ‘‘‘ a 1 b 2 c 3 dtype: int64 ‘‘‘ print(np.max(s))#可以進行np運算 s.name=‘rank‘ s.index.name=‘name‘ print(s) #創建DataFrame sdata1={‘name‘:[‘a‘,‘b‘,‘c‘],‘rank‘:[1,2,3],‘score‘:[98,89,54]} print(sdata1)#字典 df1=DataFrame(sdata1) print(df1) ‘‘‘ name rank score 0 a 1 98 1 b 2 89 2 c 3 54 ‘‘‘ df2=DataFrame(sdata1,columns=[‘score‘,‘name‘,‘rank‘]) print(df2) ‘‘‘ 可以自動對齊,只是位置變化 score name rank 0 98 a 1 1 89 b 2 2 54 c 3‘‘‘ df3=DataFrame(sdata1,columns=[‘score‘,‘name‘,‘rank‘,‘class‘],index=[‘1‘,‘2‘,‘3‘]) print(df3) ‘‘‘ class這列是缺失值 score name rank class 1 98 a 1 NaN 2 89 b 2 NaN 3 54 c 3 NaN ‘‘‘ df4=df3.reindex([‘1‘,‘2‘,‘3‘,‘4‘]) print(df4) ‘‘‘ 重新索引 score name rank class 1 98.0 a 1.0 NaN 2 89.0 b 2.0 NaN 3 54.0 c 3.0 NaN 4 NaN NaN NaN NaN‘‘‘ print(df4[‘score‘]) print(df4.ix[‘1‘]) print(df2[df2[‘score‘]>60])#返回df2中score大於60的值 ‘‘‘ score name rank 0 98 a 1 1 89 b 2 ‘‘‘ del df3[‘class‘] print(df3)#刪除class這列 sdata1={‘name‘:[‘a‘,‘b‘,‘c‘],‘rank‘:[1,2,3],‘score‘:[98,89,54]} print(sdata1) df3=DataFrame(sdata1,columns=[‘score‘,‘name‘,‘rank‘,‘class‘],index=[‘1‘,‘2‘,‘3‘]) del df3[‘class‘] print(df3) print(df3.reindex([‘1‘,‘2‘,‘3‘,‘4‘])) print(df3.reindex([‘1‘,‘2‘,‘3‘,‘4‘],fill_value=0))#缺失值賦值為0 ‘‘‘ score name rank 1 98 a 1 2 89 b 2 3 54 c 3 4 0 0 0 ‘‘‘ print(df3.reindex([‘0‘,‘1‘,‘2‘,‘3‘])) ‘‘‘ score name rank 0 NaN NaN NaN 1 98.0 a 1.0 2 89.0 b 2.0 3 54.0 c 3.0 ‘‘‘ print(df3.reindex([‘0‘,‘1‘,‘2‘,‘3‘],method=‘bfill‘))#向後填充 ‘‘‘ score name rank 0 98 a 1 1 98 a 1 2 89 b 2 3 54 c 3 ‘‘‘ print(df3.drop(‘1‘))#刪除第一行 print(df3.drop(‘score‘,axis=1))#刪除指定列,axis是維數,0是行,1是列 print(df3.T)#轉置
Python 清洗數據