1. 程式人生 > >jieba分詞python建立倒排索引

jieba分詞python建立倒排索引

# encoding=utf-8
import json
import jieba
from sys import argv
from collections import defaultdict

path = argv[1]
objs = map(lambda s: json.loads(s), open(path).readlines())
res = defaultdict(list)
for idx, obj in enumerate(objs):
    for word in set(jieba.cut_for_search(obj['title'])):
        res[word].append(idx)
while True:
    try:
        key = raw_input('請輸入查詢關鍵詞:').decode('utf-8')
        for x in map(lambda i: '%d:%s' % (i, objs[i]['title']), res.get(key, [])):
            print x
    except:
        pass