Lucene中文分詞以及關鍵字的高亮
阿新 • • 發佈:2019-02-04
渲染在頁面的關鍵字高亮, 以及適用於國內形式的中文分詞器.
需要引用的jar 看Lucene的第一章的pom.xml
package com.zero.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene .document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache .lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter ;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* 搜尋的時候 關鍵字紅色
* @author samuel
*
*/
public class SearcherColor {
private IndexReader indexReader;
private IndexSearcher indexSearcher;
/**
* 搜尋索引
* @throws Exception
*/
public void searcher() throws Exception {
String uri = "";
Directory directory = FSDirectory.open(Paths.get(uri));
indexReader = DirectoryReader.open(directory);
indexSearcher = new IndexSearcher(indexReader);
// 中文分詞
Analyzer analyer = new SmartChineseAnalyzer();
QueryParser parser = new QueryParser("title", analyer);
Query query = parser.parse("pwd.txt");
TopDocs topDocs = indexSearcher.search(query, 10);
// 高亮部分
QueryScorer queryScorer = new QueryScorer(query);
// 設定關鍵的高亮度字型和顏色
Formatter formatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
Highlighter highligther = new Highlighter(formatter, queryScorer);
Fragmenter fragmentScorer = new SimpleSpanFragmenter(queryScorer);
highligther.setTextFragmenter(fragmentScorer);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = indexReader.document(scoreDoc.doc);
String title = doc.get("title");
if (null != title) {
// 拿到最高分數的內容片斷
TokenStream tokenStream = analyer.tokenStream("title", "hahahah");
String bestSpan = highligther.getBestFragment(tokenStream, title);
System.out.println(bestSpan);
}
}
}
}