1. 程式人生 > >結巴分詞詞頻統計排序

結巴分詞詞頻統計排序

import jieba
import numpy as np
import pandas as pd

data=open("D:/data.txt").readlines()
def processs(data):
    m1=map(lambda s:s.strip("\n"),data)
    cut_words=map(lambda s:list(jieba.cut(s)),m1)
    return list(cut_words)

cut_words=processs(data)

total_words=[]
for each in cut_words:
    total_words.extend(each)

n=np.unique(total_words,return_counts=True)
s=pd.Series(data=n[1],index=n[0])
result=s.sort_values(ascending=False)
print(result)