關於pandas的 一些操作
阿新 • • 發佈:2018-12-08
1. 刪除操作
刪除csv檔案最後一行
import os
import numpy as np
import pandas as pd
def main():
df = pd.read_csv('filename' ,encoding='gb2312')
df.drop(df.index[[-1]],inplace=True)
df.to_csv('filename', encoding='gb2312',index = False)
print(df)
2. 遍歷資料夾所有檔案
import os
import numpy as np
import pandas as pd
def main():
for roots,dirs,files in os.walk("path"): #從os位置開始
for filename in files:
print(filename)
if __name__ == '__main__':
main()
3. pandas索取指定的行列
df.iat[-1,5] #最後一行第五列
df.iloc[:,7] #df第七列
4. df補充列
import os
#import numpy as np
import pandas as pd
import time
m ={'1':'Jan',
'2':'Feb',
'3':'Mar',
'4':'Apr',
'5':'May' ,
'6':'Jun' ,
'7':'Jul' ,
'8':'Aug',
'9':'Sep',
'10':'Oct',
'11':'Nov',
'12':'Dec'}
def main():
for ie in ["imports"]:
years = ["2009", "2011"]
for year in years:
print(year)
for roots,dirs,files in os.walk('india/'+ie+'/'+year+'/'):
for filename in files:
if filename.split('--')[-1] == 'final.csv':
df = pd.read_csv('india/'+ie+'/'+year+'/' + filename, encoding='gb2312')
df["Month"] = str(filename.split('--')[3])
s = str(filename.split('--')[3])
y = str(filename.split('--')[2])
y_1 =str(int(filename.split('--')[2])-1)
df["Year"] = str(filename.split('--')[2])
df=df[['S.No.', 'Year','Month', 'COUNTRY', 'HSCode-金額表編碼', 'HSCode-數量表', m[s]+' '+y_1+'-金額百萬美元',m[s]+' '+y+'-金額百萬美元', m[s]+' '+y_1+'數量', m[s]+' '+y+'數量', 'Unit (1000‘)']]
df.to_csv('india/'+ie+'/'+year+'/' +filename, encoding='gb2312',index = False)
else:
df = pd.read_csv('india/'+ie+'/'+year+'/' + filename, encoding='gb2312',skiprows=1)
s = str(filename.split('--')[3].split(".")[0])
y = str(filename.split('--')[2])
y_1 =str(int(filename.split('--')[2])-1)
df["Month"] = str(filename.split('--')[3].split(".")[0])
df["Year"] = str(filename.split('--')[2])
df=df[['S.No.', 'Year', 'Month', 'HSCode', 'Commodity', m[s]+' '+y_1, m[s]+' '+y, '%Growth',
'Jan-'+m[s]+' '+y_1, 'Jan-'+m[s]+' '+y, '%Growth.1']]
df.to_csv('india/'+ie+'/'+year+'/' +filename, encoding='gb2312',index = False)
print("ok!!!!!!!!!!!!!!!")
if __name__ == '__main__':
main()