Python中Pandas的相關使用介紹(三)
阿新 • • 發佈:2019-01-03
本篇介紹以下幾個知識點:
(1)利用pandas讀取檔案
(2)利用concat合併檔案
(3)利用append合併檔案
(4)利用append合併DataFrame與Series
(5)利用merge合併DataFrame
(6)merge處理重疊區域,handle overlapping
(1)利用pandas讀取檔案
1.1 沒有指定檔案的路徑
import pandas as pd
import numpy as np
data = pd.read_csv('zuobiao.csv') #read_csv可以讀取txt檔案,也可以讀取excel的csv格式。
print (data)
1.2 指定檔案的具體路徑
import pandas as pd
import numpy as np
data = pd.read_csv('F:/Python/poem.txt') #指定了檔案的具體路徑,注意斜槓的形式。
print(data)
(2)利用pandas的concat合併檔案
2.1 concat的ignore_index元素
import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3 ,4))*1,columns = ['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])
print(df1,"\n",df2,'\n',df3)
#合併
res1 = pd.concat([df1,df2,df3],axis = 0) #axis = 1是橫向合併
print(res1)
res2 = pd.concat([df1,df2,df3],axis = 0,ignore_index = True) #ignore_index = True意思為:索引按順序來
print(res2)
2.2 concat的join元素
import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
print(df1)
print("\n"*2)
print(df2)
res = pd.concat([df1,df2],join = 'inner',ignore_index = True) #outer
print(res)
2.3 concat的join_axes元素
import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
res = pd.concat([df1,df2],axis = 1,join_axes = [df1.index])##不要join_axes
print(df1)
print(df2)
print("\n"*2)
print(res)
(3)利用pandas的append合併檔案
import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
res = df1.append(df2,ignore_index = True)
print(df1)
print(df2)
print("\n"*2)
print(res)
append直接在第一個的結尾附加第二個
(4)利用append合併DataFrame與Series
import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
s1 = pd.Series([1,2,3,4],index = ['a','b','c','d'])
res = df1.append(s1,ignore_index = True)
print(df1)
print(s1)
print("\n"*2)
print(res)
(5)利用merge合併DataFrame
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
res = pd.merge(left,right,on = 'key')
print(left)
print(right)
print("\n"*2)
print(res)
import numpy as np
import pandas as pd
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
res = pd.merge(left,right,on = 'key')
##res = pd.merge(left,right,on = ['key1','key2']) ##只考慮相同的值,交集
##print(res)
##res = pd.merge(left,right,on = ['key1','key2'],how = 'outer') ##不論相同與否,合併兩個key,並集。不同的用nan
##print(res)
##res = pd.merge(left,right,on = ['key1','key2'],how = 'right') #基於right
##print(res)
print(left)
print(right)
print("\n"*2)
print(res)
merge的indicator元素
import numpy as np
import pandas as pd
df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
res = pd.merge(df1,df2,on = 'col1',how = 'outer',indicator = 'df')#indicator = True
print(df1)
print(df2)
print("\n"*2)
print(res)
merged by index
import numpy as np
import pandas as pd
left = pd.DataFrame({'A':['A0','A1','A2'],
'B':['B0','B1','B2']},
index = ['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C1','C2'],
'D':['D0','D1','D2']},
index = ['K0','K1','K2'])
res = pd.merge(left,right,left_index = True,right_index = True,how = 'outer')
#res = pd.merge(left,right,left_index = True,right_index = True,how = 'inner')
print(left)
print(right)
print("\n"*2)
print(res)
(6)處理重疊區域,handle overlapping
import numpy as np
import pandas as pd
#handle overlapping
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K1','K2'],'age':[4,5,6]})
res = pd.merge(boys,girls,on = 'k',suffixes = ['_boy','_girl'],how = 'inner') #outer
#兩個都有k,合併後有一個公共的k
print(boys)
print(girls)
print("\n"*2)
print(res)
附加本次學習的所有原始程式碼:
'''
import pandas as pd
import numpy as np
data = pd.read_csv('zuobiao.csv')
print(data)
data.to_pickle('student.pickle')
#合併DataFrame,concatenaing
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])
#print(df1,"\n",df2,'\n',df3)
#上下合併
res = pd.concat([df1,df2,df3],axis = 0) #1是橫向
##print(res)
##
##res2 = pd.concat([df1,df2,df3],axis = 0,ignore_index = True) #0是橫向
##print(res2)
# join,['inner','outer']
##df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
##df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
##res = pd.concat([df1,df2],join = 'inner',ignore_index = True) #outer
##print(res)
## join_axes
##df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
##df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
##res = pd.concat([df1,df2],axis = 1,join_axes = [df1.index])##不要join_axes
##print(res)
##append
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'],index = [1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'],index = [2,3,4])
res = df1.append(df2,ignore_index = True)
print(res)
##DataFrame加Series
df1 = pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
s1 = pd.Series([1,2,3,4],index = ['a','b','c','d'])
res = df1.append(s1,ignore_index = True)
print(res)
'''
# 合併merge
import pandas as pd
import numpy as np
'''
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
print(left)
print(right)
print("\n"*3)
res = pd.merge(left,right,on = 'key')
print(res)
'''
##有兩個key
##left = pd.DataFrame({'key1':['K0','K1','K2','K3'],
## 'key2':['K0','K1','K0','K1'],
## 'A':['A0','A1','A2','A3'],
## 'B':['B0','B1','B2','B3']})
##
##right = pd.DataFrame({'key1':['K0','K1','K2','K3'],
## 'key2':['K0','K0','K0','K0'],
## 'C':['C0','C1','C2','C3'],
## 'D':['D0','D1','D2','D3']})
##
##print(left)
##print(right)
##print('\n'*3)
##res = pd.merge(left,right,on = ['key1','key2']) ##只考慮相同的值,交集
##print(res)
##res = pd.merge(left,right,on = ['key1','key2'],how = 'outer') ##,基於兩個的key,並集。不同的用nan
##print(res)
##res = pd.merge(left,right,on = ['key1','key2'],how = 'right') #基於right
##print(res)
###indicator引數
##df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
##df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
##print(df1)
##print(df2)
##print('\n'*3)
##
##res = pd.merge(df1,df2,on = 'col1',how = 'outer',indicator = 'df')#indicator = True
##print(res )
#merged by index
##left = pd.DataFrame({'A':['A0','A1','A2'],
## 'B':['B0','B1','B2']},
## index = ['K0','K1','K2'])
##right = pd.DataFrame({'C':['C0','C1','C2'],
## 'D':['D0','D1','D2']},
## index = ['K0','K1','K2'])
##
##print(left)
##print(right)
##print("\n"*3)
##res = pd.merge(left,right,left_index = True,right_index = True,how = 'outer')
###res = pd.merge(left,right,left_index = True,right_index = True,how = 'inner')
##print(res)
#handle overlapping
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K1','K2'],'age':[4,5,6]})
res = pd.merge(boys,girls,on = 'k',suffixes = ['_boy','_girl'],how = 'inner') #outer
print(res)
#join功能
#plt模組畫圖