視覺化matplotlib
阿新 • • 發佈:2018-11-10
#!/usr/bin/env python # -*- coding:utf-8 -*- import pandas as pd import matplotlib.pyplot as plt import numpy as np unrate = pd.read_csv('unrate.csv') unrate['DATE'] = pd.to_datetime(unrate['DATE'])#把date這一列轉換成datetime格式 print(unrate.head(12)) #列印前12個日期 #plt.plot() plot()函式 #plt.show() first_twelve = unrate[0:12] plt.plot(first_twelve['DATE'],first_twelve['VALUE']) #確定左邊x軸和y軸 plt.xticks(rotation=45) #x軸的數字旋轉45度 plt.xlabel('Month') #給X,Y軸加上標籤 plt.ylabel('Unemployment Rate') plt.title('Monthly Unemployment Trends,1948') #標題 plt.show() #繪製子圖 fig = plt.figure(figsize=(3,3)) #可以指定figsize ax1 = fig.add_subplot(2,1,1) ax2 = fig.add_subplot(2,1,2) ax3= fig.add_subplot(4,3,6) ax1.plot(np.random.randint(1,5,5),np.arange(5)) ax2.plot(np.arange(10)*3,np.arange(10)) #傳入隨機值 plt.show() #同一個圖中畫兩條曲線 unrate['MONTH']=unrate['DATE'].dt.month fig = plt.figure(figsize=(6,3)) plt.plot(unrate[0:12]['MONTH'],unrate[0:12]['VALUE'],c = 'red') plt.plot(unrate[12:24]['MONTH'],unrate[12:24]['VALUE'],c = 'blue') plt.show() #圖中劃出幾條線,用不同的顏色表示 fig = plt.figure(figsize=(10,6)) colors = ['red','blue','green','orange','black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index:end_index] label = str(1948+i) plt.plot(subset['MONTH'],subset['VALUE'],c = colors[i],label=label) plt.legend(loc='upper left') #指定線表示什麼 plt.xlabel('Month,Integer') plt.ylabel('Unemployment Rate,Percent') plt.title('Monthly Unemployment Trends,1948-1952') plt.show() from numpy import arange reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars'] norm_reviews = reviews[cols] print(norm_reviews[:1]) #第一個電影的各種資訊 num_cols = ['RT_user_norm','Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue','Fandango_Stars']#各個媒體的評分值 bar_heights = norm_reviews.ix[0,num_cols].values #柱狀圖的高度 print(bar_heights) bar_positions = arange(5)+0.75 #柱狀圖的位置,距離原點 print(bar_positions) fig,ax = plt.subplots() #畫出這個圖,ax畫圖,fig控制這個圖是什麼樣子 ax.bar(bar_positions,bar_heights,0.3) #bar形圖,即柱狀圖,0.3表示寬度 ax.set_xticklabels(num_cols, rotation=45) #x軸的字母傾斜45度 ax.set_xlabel('Rating Source') #設定x,y的標籤 ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show() #散點圖:scatter() fig,ax = plt.subplots() #需要傳入兩個媒體的值,然後設定X和Y軸 ax.scatter(norm_reviews['Fandango_Ratingvalue'],norm_reviews['RT_user_norm']) ax.set_xlabel('Fandango') ax.set_ylabel('Rottten Tomatoes') plt.show() fig = plt.figure(figsize=(5,10)) ax1 = fig.add_subplot(2,1,1) ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm']) ax1.set_xlabel('Fandango') ax1.set_ylabel('Rotten Tomatoes') plt.show() reviews = pd.read_csv('fandango_scores.csv') cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue'] norm_reviews = reviews[cols] print(norm_reviews[:5]) #前五行 fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts() fandango_distribution = fandango_distribution.sort_index() imdb_distribution = norm_reviews['IMDB_norm'].value_counts() imdb_distribution = imdb_distribution.sort_index() print(fandango_distribution) print(imdb_distribution) fig, ax = plt.subplots() ax.hist(norm_reviews['Fandango_Ratingvalue']) ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)#bins:指定有多少個區間 ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20) #range:指定起始的區間 plt.show() #作四個圖 fig = plt.figure(figsize=(5,20)) ax1 = fig.add_subplot(4,1,1) ax2 = fig.add_subplot(4,1,2) ax3 = fig.add_subplot(4,1,3) ax4 = fig.add_subplot(4,1,4) ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5)) ax1.set_title('Distribution of Fandango Ratings') ax1.set_ylim(0, 50) #ylim:指定區間的大小 ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5)) ax2.set_title('Distribution of Rotten Tomatoes Ratings') ax2.set_ylim(0, 50) ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5)) ax3.set_title('Distribution of Metacritic Ratings') ax3.set_ylim(0, 50) ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5)) ax4.set_title('Distribution of IMDB Ratings') ax4.set_ylim(0, 50) plt.show()