1. 程式人生 > >Python 清洗數據

Python 清洗數據

drop index pytho code 一行 dataframe cor 進行 rop

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

s=Series([1,2,3],index=[a,b,c])
print(s)
‘‘‘
a    1
b    2
c    3
dtype: int64
‘‘‘
print(np.max(s))#可以進行np運算

s.name=rank
s.index.name=name
print(s)

#創建DataFrame
sdata1={name:[a,b,c],rank:[1,2,3],score
:[98,89,54]} print(sdata1)#字典 df1=DataFrame(sdata1) print(df1) ‘‘‘ name rank score 0 a 1 98 1 b 2 89 2 c 3 54 ‘‘‘ df2=DataFrame(sdata1,columns=[score,name,rank]) print(df2) ‘‘‘ 可以自動對齊,只是位置變化 score name rank 0 98 a 1 1 89 b 2 2 54 c 3
‘‘‘ df3=DataFrame(sdata1,columns=[score,name,rank,class],index=[1,2,3]) print(df3) ‘‘‘ class這列是缺失值 score name rank class 1 98 a 1 NaN 2 89 b 2 NaN 3 54 c 3 NaN ‘‘‘ df4=df3.reindex([1,2,3,4]) print(df4) ‘‘‘ 重新索引 score name rank class 1 98.0 a 1.0 NaN 2 89.0 b 2.0 NaN 3 54.0 c 3.0 NaN 4 NaN NaN NaN NaN
‘‘‘ print(df4[score]) print(df4.ix[1]) print(df2[df2[score]>60])#返回df2中score大於60的值 ‘‘‘ score name rank 0 98 a 1 1 89 b 2 ‘‘‘ del df3[class] print(df3)#刪除class這列 sdata1={name:[a,b,c],rank:[1,2,3],score:[98,89,54]} print(sdata1) df3=DataFrame(sdata1,columns=[score,name,rank,class],index=[1,2,3]) del df3[class] print(df3) print(df3.reindex([1,2,3,4])) print(df3.reindex([1,2,3,4],fill_value=0))#缺失值賦值為0 ‘‘‘ score name rank 1 98 a 1 2 89 b 2 3 54 c 3 4 0 0 0 ‘‘‘ print(df3.reindex([0,1,2,3])) ‘‘‘ score name rank 0 NaN NaN NaN 1 98.0 a 1.0 2 89.0 b 2.0 3 54.0 c 3.0 ‘‘‘ print(df3.reindex([0,1,2,3],method=bfill))#向後填充 ‘‘‘ score name rank 0 98 a 1 1 98 a 1 2 89 b 2 3 54 c 3 ‘‘‘ print(df3.drop(1))#刪除第一行 print(df3.drop(score,axis=1))#刪除指定列,axis是維數,0是行,1是列 print(df3.T)#轉置

Python 清洗數據