1. 程式人生 > >14、Analyzer之TokenFilter

14、Analyzer之TokenFilter

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * Created by kangz on 2016/12/16.
 */
public class AnalyzersTokenFilter {
    @Test
    public void test() throws IOException {
        String text = "Hi, Dr Wang, Mr Liu asks if you stay with Mrs Liu yesterday!";
        Analyzer analyzer = new WhitespaceAnalyzer();
        CourtesyTitleFilter filter = new CourtesyTitleFilter(analyzer.tokenStream("text", text));
        CharTermAttribute charTermAttribute = filter.addAttribute(CharTermAttribute.class);
        filter.reset();
        while (filter.incrementToken()) {
            System.out.print(charTermAttribute + " ");
        }
    }
    /**
     * 自定義詞擴充套件過濾器
     */
    class CourtesyTitleFilter extends TokenFilter {
        Map<String, String> courtesyTitleMap = new HashMap<>();
        private CharTermAttribute termAttribute;

        /**
         * 構造一個過濾給定輸入的令牌流。
         *
         * @param input
         */
        protected CourtesyTitleFilter(TokenStream input) {
            super(input);
            termAttribute = addAttribute(CharTermAttribute.class);
            courtesyTitleMap.put("Dr", "doctor");
            courtesyTitleMap.put("Mr", "mister");
            courtesyTitleMap.put("Mrs", "miss");
        }

        @Override
        public final boolean incrementToken() throws IOException {
            if (!input.incrementToken()) {
                return false;
            }
            String small = termAttribute.toString();
            if (courtesyTitleMap.containsKey(small)) {
                termAttribute.setEmpty().append(courtesyTitleMap.get(small));
            }
            return true;
        }
    }
}
//這段程式碼是參考 其他部落格中的程式碼所寫的,上面已經寫的挺清楚了,所以我這裡就不在囉嗦啦

下面是小編的微信轉帳二維碼,小編再次謝謝讀者的支援,小編會更努力的

----請看下方↓↓↓↓↓↓↓

百度搜索 Drools從入門到精通:可下載開源全套Drools教程

深度Drools教程不段更新中:


更多Drools實戰陸續釋出中………

掃描下方二維碼關注公眾號 ↓↓↓↓↓↓↓↓↓↓