第八章 馬爾科夫鏈 -將演講內容生成鏈長為100的markov組成的句子
阿新 • • 發佈:2018-12-19
#!/usr/bin/env python # _*_ coding:utf-8 _*_ from random import randint from urllib.request import urlopen def wordListSum(wordList): sum=0 for word ,value in wordList.items(): sum+=value return sum def retrieveRandomWord(wordList): randomIndex=randint(1,wordListSum(wordList)) for word,value in wordList: randomIndex-=value if value<=0: return word def buildWordDict(text): #剔除換行符和引號 text=text.replace("\n"," ") text=text.replace("\"","") punctuation=[',','.',';',':'] for symbol in punctuation: text=text.replace(symbol," "+symbol+" ") words=text.split(" ") words=[word for word in words if word!=""] wordDict={} for i in range(0,len(words)): if words[i-1] not in wordDict: #新建一個字典 wordDict[words[i-1]]={} if words[i] not in wordDict[words[i-1]]: wordDict[words[i-1]][words[i]]=wordDict[words[i-1]][words[i]]+1 return wordDict text=str(urlopen("https://pythonscraping.com/files/inaugurationSpeech.txt").read(),'utf-8') wordDict=buildWordDict(text) #生成鏈長為100的馬爾科夫鏈 length=100 chain="" currentWord="I" for i in range(0,length): chain+=currentWord currentWord=retrieveRandomWord(wordDict[currentWord]) print(chain)