1. 程式人生 > >部分程式碼2

部分程式碼2

原文連結https://blog.csdn.net/qq_36097393/article/details/83574269

import re
import time

from collections import Counter
t0 = time.clock()

#!/usr/bin/env python
#-*- coding:utf-8 -*-
#author: Enoch time:2018/11/1 0001

def CountWords(file_name,stopName):

    if (stopName != None):
        stopflag = True
    else
: stopflag = False with open(file_name) as f: txt = f.read() txt = txt.lower() if(stopflag == True): with open(stopName) as f: stoplist = f.readlines() stopNum = len(stoplist) pattern = r"[a-z][a-z0-9]*" wordList = re.findall(pattern,
txt) totalNum = len(wordList) tempc = Counter(wordList) if (stopflag == True): for word in stoplist: word = word.replace('\n','') del tempc[word] dicNum = dict(tempc.most_common(10)) dicNum = sorted(dicNum.items(), key=lambda k:k[0]) dicNum = sorted
(dicNum, key=lambda k:k[1], reverse=True) t1 = time.clock() for letter, fre in dicNum[:2]: print("|\t{:15}|{:<11.2%}|".format(letter, fre/totalNum)) print(t1 - t0) CountWords('../gone_with_the_wind.txt','../stopwords.txt')