1. 程式人生 > >4染發放g

4染發放g

次數 spl utf ace 字典 全部 replace HA tlist

#1.
theFile = open(‘the.txt‘,mode=‘r‘,encoding=‘utf-8‘)
theText = theFile.read()  # 從文件裏讀出全部文本,字符串
theFile.close()
print(theText)

#2.
replaceList = [‘,‘,‘.‘,"‘",‘\n‘]
for c in replaceList:
    theText = theText.replace(c,‘ ‘)  # 替換掉所以標點符號
theText = theText.replace(‘  ‘,‘ ‘)
print(theText)

#3.
print(theText.split(‘ ‘))
theList = theText.split(‘ ‘)  #列表 出現的單詞序列

#4.
theSet = set(theList)  # 集合 有哪些單詞
print(theSet)

theDict = {}  # 字典:每個單詞的詞頻統計次數
for word in theSet:
    theDict[word] = theList.count(word)
print(theDict)


#5.排序
wordCountList = list(theDict.items())  #字典沒有順序,不能排序,轉換成列表進行排序
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True) # 進行排序
print(wordCountList)

‘‘‘
#6.輸出top20
for i in range(20):
    print(wordCountList[i])

  

4染發放g