Lucene分詞器 IKAnalyzer
阿新 • • 發佈:2019-02-08
需要匯入jar:
public class IKAnalyze { //分詞器 @Test public void test() throws Exception { //String word="a good person,Happy Every Day"; //String word="我為何不哭,因為我僅存的,就只有堅強了"; String word="中華人民共和國KWWL DRGYBN,北大老鳥,我們是"; //Analyzer analyzer = new StandardAnalyzer();//一元分詞 // Analyzer analyzer=new CJKAnalyzer(); //二元分詞 // Analyzer analyzer=new SmartChineseAnalyzer(); //智慧中文分詞 //IK分詞 Analyzer analyzer=new IKAnalyzer(true); testAnalyzer(analyzer,word); } //使用指定的分詞器對指定的文字進行分詞 public void testAnalyzer(Analyzer analyzer, String text) throws Exception { System.out.println("分詞器:" + analyzer.getClass()); StringReader reader= newStringReader(text); TokenStream tokenStream = analyzer.tokenStream("content",reader); tokenStream.reset(); CharTermAttribute cta =tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { System.out.println(cta); } reader.close(); }