python自然語言處理第五章習題
import nltk
s='They wind back the clock,while we chase after the wind'
s1=nltk.word_tokenize(s)
s2=nltk.pos_tag(s1) #list型別
d={'colorless':'ADJ','idea':'N','sleep':'V'} #create a dict
del d['sleep'] #delete an item from dict d
d.update({'color':'N'}) #add new item into dict d
d2=nltk.defaultdict(list)
d2['N'].append('color') #add new word for type N
9.驗證go和went在分佈上的限制,也就是說,它們不能自由互換。
from nltk.book import *
text1.concordance('go')
text1.concordance('went')
from nltk.corpus import brown
brown_tagged_sents=brown.tagged_sents(categories='news')
brown_sents=brown.sents(categories='news')
unigram_tagger=nltk.UnigramTagger(brown_sents)
unigram_tagger.tag(brown_sents[2007])
unigram_tagger.evaluate(brown_tagged_sents)
import nltk
from nltk.corpus import brown
brown_sents=brown.sents(categories='news')
brown_tagged_sents=brown.tagged_sents(categories='news')
affixtagger=nltk.AffixTagger(brown_tagged_sents)
affistagger.tag(brown_sents[2007])
import nltk
from nltk.corpus import brown
brown_sents=brown.sents(categories='news')
brown_tagged_sents=brown.tagged_sents(categories='news')
bigram_tagger=nltk.BigramTagger(brown_tagged_sents)
bigram_tagger.tag(brown_sents[2007])
bigram_tagger.evaluate(brown_tagged_sents)
d={'year':2016,'month':8,'day':15}
print %s-%s-%s %( d['year'],d['month'],d['day'])
sorted(set(brown.word(categories='news')))
a.哪些名詞經常以它們的複數形式出現而不是它們的單數形式?