python第四次作業
阿新 • • 發佈:2018-10-15
print item 字典 src tin you range 目錄 word
q = open(‘遇見.txt‘, ‘r‘, encoding=‘utf-8‘).read() wordsls = jieba.lcut(q) wcdict = {} for word in wordsls: if len(word) == 1: continue else: wcdict[word] = wcdict.get(word, 0) + 1 wcls = list(wcdict.items()) wcls.sort(key=lambda x: x[1], reverse=True) print(wcls) for i in range(7): print(wcls[i])
#準備utf-8編碼的文本文件,通過文件讀取字符串str fo=open(‘because of you.txt‘,‘r‘,encoding=‘utf-8‘) stra = fo.read().lower() fo.close() print(stra) sep=‘,.;!‘ for ch in sep: stra = stra.replace(ch,‘‘)#進行預處理,清除掉sep中存在的標點符號 print(stra) strList=stra.split(‘‘) print(len(strList),strList)#分解提取單詞,轉化為列表list strSet = set(strList) print(len(strSet),strSet)#轉化為集合 strDict={} for world in strSet: strDict[world] = strList.count(world) print(len(strDict),strDict)#轉化為字典,計算上一個集合中每個單詞出現的次數 wcList=list(strDict.items()) print(wcList)#將字典中的目錄轉化為列表輸出 wcList.sort(key=lambda x:x[1],reverse= True) print(wcList) #按降序輸出 e = {‘a‘,‘the‘,‘an‘,‘and‘,‘i‘,‘or‘,‘of‘} strSet = strSet - e print(len(strSet),strSet) #排除語法型詞匯,代詞、冠詞、連詞等無語義詞 for i in range(20): print(wcList[i]) #TOP20輸出
python第四次作業