製作簡單 '詞雲圖'
阿新 • • 發佈:2018-12-22
import jieba from jieba.analyse import extract_tags from scipy.misc import imread from wordcloud import WordCloud import matplotlib.pyplot as plt def generater(**kwargs): content_name = kwargs['content_name'] content_path = './{}.txt'.format(content_name) top_k = kwargs['top_k'] bg_name = kwargs['bg_name'] color = kwargs['color'] font_type = kwargs['font_type'] with open(content_path, mode='r', encoding='utf-8') as f: content = f.read() ''' 根據TF/IDF提取topK個關鍵詞 ''' tags = extract_tags(sentence=content, topK=top_k) ''' 得到關鍵詞的詞頻 ''' # 全模式 words = [word for word in jieba.cut(content, cut_all=True)] words_freq = {} for tag in tags: freq = words.count(tag) words_freq[tag] = freq ''' 設定背景 scipy.misc imread():返回的是 numpy.ndarray 也即 numpy 下的多維陣列物件 ''' bg_path = './{}.png'.format(bg_name) bg_img = imread(bg_path) font_path = './{}.ttf'.format(font_type) word_cloud = WordCloud(font_path=font_path, # 設定字型 background_color=color, # 背景顏色 max_words=top_k, # 詞雲顯示的最多詞數 max_font_size=100, # 字型最大 mask=bg_img, # 背景圖 ) word_cloud.generate_from_frequencies(words_freq) plt.imshow(word_cloud) plt.axis('off') # 不顯示座標軸 plt.show() # 儲存圖片 word_cloud_img = './{}_word_cloud.jpg'.format(content_name) word_cloud.to_file(word_cloud_img) if __name__ == '__main__': generater(content_name='習近平:在慶祝中國共產黨成立95週年大會上的講話', top_k=66, bg_name='china', # 預設png color='black', font_type='wryh')
windows 安裝 wordcloud
pip install wordcloud-1.3.1-cp36-cp36m-win_amd64.whl