1. 程式人生 > 其它 >Python讀取檔案並寫入ODPS

Python讀取檔案並寫入ODPS

前提:

  將本地csv檔案,用pandas讀取,並進行資料ETL,最後匯入到ODPS表中。

程式碼如下:

import pandas as pd
from tqdm import tqdm_notebook
from odps import ODPS
from odps import options
from odps.df import DataFrame


from odps.df import DataFrame
# 檢視相對路徑
%pwd
odps = ODPS('ODPS_ak', 'ODPS_pw', 'ODPS專案空間',
            endpoint='
http://service.cn-shanghai.maxcompute.aliyun.com/api', tunnel_endpoint="http://dt.cn-shanghai.maxcompute.aliyun.com")
# 讀取資料
data = pd.read_csv('/home/linxz/datadir/202108.csv',encoding='utf-8') # 07已同步
# 檢視資料是否全部讀取成功
data.describe()
# 資料ETL
a = data[['company_id','ofr_id','bid_id','security_id
','bond_id','listed_market','short_name','time','bid_price','bid_volume','ofr_price','ofr_volume','bid_yield','bid_net_price','bid_flag_bargain','bid_flag_relation','bid_exercise','bid_price_description','bid_quote_type','ofr_yield','ofr_net_price','ofr_flag_bargain','ofr_flag_relation','ofr_exercise
']] b = data[['ofr_price_description']] b_new = b.replace('[,]','',regex = True) # 使用正則表示式,將資料中的英文逗號改為中文
c = data[['ofr_quote_type','bid_ss_detect','ofr_ss_detect']]
data_join = a.join(b_new)
data_new = data_join.join(c)
data_new.head()
 
# 新增最後面一列,作為分割槽
data_new['month_date'] = '202108'
data_new.head()


# 最後匯入ODPS表中 
options.connect_timeout=200
options.tunnel.use_instance_tunnel = True
options.tunnel.limit_instance_tunnel = False
odps_awake_model_result = DataFrame(data_new)
print(odps_awake_model_result)
odps_awake_model_result.persist('ODPS專案空間.o_sumscope_bond_relation_detail_d', partitions=['month_date'], odps=odps)


# 匯出方式二:匯出到本地
# 匯出修改後的csv檔案 # data_new.to_csv(
'/home/linxz/datadir/202101_new.csv')