利用Python3做詞頻統計和詞雲圖
阿新 • • 發佈:2018-12-22
起源:
因看到一篇滿眼是字的文章,故希望能夠快速的檢索出關鍵字,所以嘗試用Python3來實現。
程式碼
import jieba
import numpy
import codecs
import pandas
import matplotlib.pyplot as plt
from wordcloud import WordCloud
file = codecs.open(r"ljs.txt")
content = file.read()
file.close()
segment=[]
segs=jieba.cut(content)
for seg in segs:
if len(seg) > 1 and seg != '\r\n':
segment.append(seg)
words_df = pandas.DataFrame({'segment':segment})
stopwords = pandas.read_csv('stopword.txt',index_col=False,quoting=3,sep=',',names=['stopword'],encoding="utf-8")
words_df=words_df[~words_df.segment.isin(stopwords.stopword)]
words_stat = words_df.groupby(by=['segment' ])['segment'].agg({"計數":numpy.size})
words_stat = words_stat.reset_index().sort_values(by=["計數"],ascending=False)
words_df.head()
wordcloud = WordCloud(font_path='simhei.ttf',background_color='black')
words_frequence = {x[0]:x[1] for x in words_stat.values}
#fit_word函式,接受字典型別,其他型別會報類似沒有items屬性的錯誤
wordcloud = wordcloud.fit_words(words_frequence)
plt.imshow(wordcloud)
plt.show()