lucene的分詞器的簡單應用例項
阿新 • • 發佈:2018-12-18
方法呼叫流程圖:
第一步:呼叫tokenStream方法:
第二步:呼叫createComponents方法生成TokenStreamComponents
第三步:分詞的具體邏輯處理,並把分好的詞放在自定義的attribute中
程式碼實現:
package com.shidebin.lucence.lucence_quickStart; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; //1.定義MyAnalysis實現Analyzer並實現createComponents //2.在createComponents方法中建立TokenStreamComponents,從TokenStreamComponents構造器能發現可傳倆個引數source。sink //TokenStream的子類Tokenizer,TokenStream使用了裝飾者模式 import org.apache.lucene.util.AttributeReflector; public class MyAnalysis extends Analyzer{ @Override protected TokenStreamComponents createComponents(String fieldName) { MyTokenizer source = new MyTokenizer(); TokenStream sink = new MyTokenFilter(source); return new TokenStreamComponents(source); } public static class MyTokenizer extends Tokenizer{ Myattribute addAttribute = this.addAttribute(Myattribute.class); @Override public boolean incrementToken() throws IOException { // 清除所有的詞項屬性 clearAttributes(); char[] word = new char[255]; int position = 0; while(true) { int read = this.input.read(); //沒讀到末尾 if(read != -1) { if(Character.isWhitespace((char)read)){ if(position > 0) { addAttribute.genAttribute(word); } return true; }else { word[position++] = (char)read; } }else { if(position > 0) { addAttribute.genAttribute(word); return true; } return false; } } } } public static class MyTokenFilter extends TokenFilter{ protected MyTokenFilter(TokenStream input) { super(input); } @Override public boolean incrementToken() throws IOException { // 對MyTokenizer分詞之後的詞再進行處理,例如大寫轉小寫 return false; } } public static interface Myattribute extends Attribute{ void genAttribute(char[] word); String getAttribute(); } public static class MyattributeImpl extends AttributeImpl implements Myattribute{ private String attribute; @Override public void clear() { // TODO Auto-generated method stub } @Override public void reflectWith(AttributeReflector reflector) { // TODO Auto-generated method stub } @Override public void copyTo(AttributeImpl target) { // TODO Auto-generated method stub } @Override public void genAttribute(char[] word) { this.attribute = new String(word).trim(); } @Override public String getAttribute() { return this.attribute; } } public static void main(String[] args) { MyAnalysis ana = new MyAnalysis(); String text = "fasfas ERIKNI fasf FASDFdfsd FASFJKL jfkadsjfakl"; TokenStream tokenStream = ana.tokenStream("abc", text); try { Myattribute attribute = tokenStream.getAttribute(Myattribute.class); tokenStream.reset(); while(tokenStream.incrementToken()) { System.out.print(attribute.getAttribute()+"|"); } tokenStream.end(); System.out.println(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }