ggplot在python中的使用(plotnine)
阿新 • • 發佈:2020-09-06
# 條形圖 median_age_dict={ 'Country': ['New Zealand','Spain','Ireland','Israel','Denmark','Norway','Netherlands','Australia','Italy','Sweden'], 'Age': [39.0, 37.0, 35.0, 34.0, 34.0, 34.0, 34.0, 34.0, 34.0, 34.0] } median_age=pd.DataFrame(median_age_dict) ( # fill以顏色區分 ggplot(median_age,aes(x='Country',y='Age',fill='Country')) # 條形圖,需要stat指明統計量 +geom_bar(stat='identity',width=0.5) # 文字標籤,nudge_y表示偏離量 +geom_text(aes(x='Country',y='Age',label='Age'),nudge_y=2) +coord_flip()# 翻轉x,y # 排序條形圖 +xlim(median_age['Country']) # +xlim(median_age['Country'][::-1])逆序 # 隱藏圖例 +theme(legend_position = 'none') # 加上標題 +ggtitle('Top 10 Median age of respondents from different countries') ) # 折線圖 from plotnine.data import economics save_rate = economics[economics['date']>'2013-01-01'] save_rate=save_rate.reset_index(drop=True) ( ggplot(save_rate,aes(x='date',y='psavert')) + geom_line(color='blue') +geom_point(color='red') + ylim(0,6)#y軸的範圍 # 改變x座標刻度及間距 + scale_x_date(breaks='5 months',date_labels='%Y-%m') ) # 堆疊條形圖 importance_dict={ 'tool': ['Python','Python','Python','R','R','R','BigData','BigData','BigData','SQL','SQL','SQL'], 'importance': ['Necessary','Nice to have','Unnecessary','Nice to have','Necessary','Unnecessary','Nice to have','Necessary','Unnecessary','Nice to have','Necessary','Unnecessary'], 'mix': [0.6459935499875962,0.32721409079632846,0.026792359216075416,0.5139452332657201,0.4148073022312373,0.07124746450304259,0.5740647118301314,0.3799292214357937,0.04600606673407482,0.49177800616649536,0.434224049331963,0.07399794450154162] } Jobskillimpotance=pd.DataFrame(importance_dict) # Jobskillimpotance不能繪製多個折線圖,因為geom_line要求資料點是數值座標形式…… ( ggplot(Jobskillimpotance,aes(x='tool',y='mix',fill='importance'))#傳入資料來源和對映 + geom_bar(stat='identity')#統計方式為原資料 ) # 多折線圖 ( ggplot(df,aes(x='year',y='mix',fill='gender',color='gender')) + geom_line(size=1) + geom_point(aes(shape='gender'),size=3)#新增資料標記 + scale_x_date(breaks='5 years',date_labels='%Y')#設定x軸的間隔和表示格式 + ylim(0,6)#y軸範圍 )