1. 程式人生 > >【詞雲】代碼

【詞雲】代碼

default rpo array ear strip() gray stop jieba pyplot

 1 import sys
 2 reload(sys)
 3 sys.setdefaultencoding(utf-8)
 4 
 5 from os import path
 6 from PIL import Image
 7 import numpy as np
 8 import matplotlib.pyplot as plt
 9 
10 from wordcloud import WordCloud
11 import jieba
12 d = path.dirname(__file__)
13 
14 stopWordFile = ustopwords.txt
15 stopWordList = [] 16 for L in open(path.join(d , stopWordFile)).readlines(): 17 stopWordList.append(L.strip().decode(utf-8)) 18 stopWordList = set(stopWordList) 19 20 new = words.txt 21 22 text = open(path.join( d , new )).read().strip(\r) 23 wordDict = {} 24 for w in jieba.cut(text):
25 if w not in stopWordList: 26 wordDict.setdefault(w , 0) 27 wordDict[w] += 1 28 29 maskImg = china.jpeg 30 alice_mask = np.array( Image.open(path.join(d , maskImg))) 31 32 wc = WordCloud(background_color = white, max_words = 2000 , mask = alice_mask , 33 font_path = path.join(d ,
msyh.ttf)) 34 wc.generate_from_frequencies(wordDict) 35 36 wc.to_file(path.join(d, "example.png")) 37 38 # show 39 plt.imshow(wc, interpolation=bilinear) 40 plt.axis("off") 41 plt.figure() 42 plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation=bilinear) 43 plt.axis("off") 44 plt.show()

【詞雲】代碼