LuceneUtils工具類簡單封裝
阿新 • • 發佈:2019-01-08
週六花了整整一下午,將Lucene5中有關索引的常見操作進行了簡單封裝,廢話不多說,上程式碼:
package com.yida.framework.lucene5.util;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
/**
* Lucene索引讀寫器/查詢器單例獲取工具類
* @author Lanxiaowei
*
*/
public class LuceneManager {
private volatile static LuceneManager singleton;
private volatile static IndexWriter writer;
private volatile static IndexReader reader;
private volatile static IndexSearcher searcher;
private final Lock writerLock = new ReentrantLock();
//private final Lock readerLock = new ReentrantLock();
//private final Lock searcherLock = new ReentrantLock();
private LuceneManager() {}
public static LuceneManager getInstance() {
if (null == singleton) {
synchronized (LuceneManager.class) {
if (null == singleton) {
singleton = new LuceneManager();
}
}
}
return singleton;
}
/**
* 獲取IndexWriter單例物件
* @param dir
* @param config
* @return
*/
public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) {
if(null == dir) {
throw new IllegalArgumentException("Directory can not be null.");
}
if(null == config) {
throw new IllegalArgumentException("IndexWriterConfig can not be null.");
}
try {
writerLock.lock();
if(null == writer){
//如果索引目錄被鎖,則直接拋異常
if(IndexWriter.isLocked(dir)) {
throw new LockObtainFailedException("Directory of index had been locked.");
}
writer = new IndexWriter(dir, config);
}
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
writerLock.unlock();
}
return writer;
}
/**
* 獲取IndexReader物件
* @param dir
* @param enableNRTReader 是否開啟NRTReader
* @return
*/
public IndexReader getIndexReader(Directory dir,boolean enableNRTReader) {
if(null == dir) {
throw new IllegalArgumentException("Directory can not be null.");
}
try {
if(null == reader){
reader = DirectoryReader.open(dir);
} else {
if(enableNRTReader && reader instanceof DirectoryReader) {
//開啟近實時Reader,能立即看到動態新增/刪除的索引變化
reader = DirectoryReader.openIfChanged((DirectoryReader)reader);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return reader;
}
/**
* 獲取IndexReader物件(預設不啟用NETReader)
* @param dir
* @return
*/
public IndexReader getIndexReader(Directory dir) {
return getIndexReader(dir, false);
}
/**
* 獲取IndexSearcher物件
* @param reader IndexReader物件例項
* @param executor 如果你需要開啟多執行緒查詢,請提供ExecutorService物件引數
* @return
*/
public IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) {
if(null == reader) {
throw new IllegalArgumentException("The indexReader can not be null.");
}
if(null == searcher){
searcher = new IndexSearcher(reader);
}
return searcher;
}
/**
* 獲取IndexSearcher物件(不支援多執行緒查詢)
* @param reader IndexReader物件例項
* @return
*/
public IndexSearcher getIndexSearcher(IndexReader reader) {
return getIndexSearcher(reader, null);
}
}
package com.yida.framework.lucene5.util;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import org.ansj.lucene5.AnsjAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* Lucene工具類(基於Lucene5.0封裝)
* @author Lanxiaowei
*
*/
public class LuceneUtils {
private static final LuceneManager luceneManager = LuceneManager.getInstance();
private static Analyzer analyzer = new AnsjAnalyzer();
/**
* 開啟索引目錄
*
* @param luceneDir
* @return
* @throws IOException
*/
public static FSDirectory openFSDirectory(String luceneDir) {
FSDirectory directory = null;
try {
directory = FSDirectory.open(Paths.get(luceneDir));
/**
* 注意:isLocked方法內部會試圖去獲取Lock,如果獲取到Lock,會關閉它,否則return false表示索引目錄沒有被鎖,
* 這也就是為什麼unlock方法被從IndexWriter類中移除的原因
*/
IndexWriter.isLocked(directory);
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
/**
* 關閉索引目錄並銷燬
* @param directory
* @throws IOException
*/
public static void closeDirectory(Directory directory) throws IOException {
if (null != directory) {
directory.close();
directory = null;
}
}
/**
* 獲取IndexWriter
* @param dir
* @param config
* @return
*/
public static IndexWriter getIndexWrtier(Directory dir, IndexWriterConfig config) {
return luceneManager.getIndexWriter(dir, config);
}
/**
* 獲取IndexWriter
* @param dir
* @param config
* @return
*/
public static IndexWriter getIndexWrtier(String directoryPath, IndexWriterConfig config) {
FSDirectory directory = openFSDirectory(directoryPath);
return luceneManager.getIndexWriter(directory, config);
}
/**
* 獲取IndexReader
* @param dir
* @param enableNRTReader 是否開啟NRTReader
* @return
*/
public static IndexReader getIndexReader(Directory dir,boolean enableNRTReader) {
return luceneManager.getIndexReader(dir, enableNRTReader);
}
/**
* 獲取IndexReader(預設不啟用NRTReader)
* @param dir
* @return
*/
public static IndexReader getIndexReader(Directory dir) {
return luceneManager.getIndexReader(dir);
}
/**
* 獲取IndexSearcher
* @param reader IndexReader物件
* @param executor 如果你需要開啟多執行緒查詢,請提供ExecutorService物件引數
* @return
*/
public static IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) {
return luceneManager.getIndexSearcher(reader, executor);
}
/**
* 獲取IndexSearcher(不支援多執行緒查詢)
* @param reader IndexReader物件
* @return
*/
public static IndexSearcher getIndexSearcher(IndexReader reader) {
return luceneManager.getIndexSearcher(reader);
}
/**
* 建立QueryParser物件
* @param field
* @param analyzer
* @return
*/
public static QueryParser createQueryParser(String field, Analyzer analyzer) {
return new QueryParser(field, analyzer);
}
/**
* 關閉IndexReader
* @param reader
*/
public static void closeIndexReader(IndexReader reader) {
if (null != reader) {
try {
reader.close();
reader = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 關閉IndexWriter
* @param writer
*/
public static void closeIndexWriter(IndexWriter writer) {
if(null != writer) {
try {
writer.close();
writer = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 關閉IndexReader和IndexWriter
* @param reader
* @param writer
*/
public static void closeAll(IndexReader reader, IndexWriter writer) {
closeIndexReader(reader);
closeIndexWriter(writer);
}
/**
* 刪除索引[注意:請自己關閉IndexWriter物件]
* @param writer
* @param field
* @param value
*/
public static void deleteIndex(IndexWriter writer, String field, String value) {
try {
writer.deleteDocuments(new Term[] {new Term(field,value)});
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 刪除索引[注意:請自己關閉IndexWriter物件]
* @param writer
* @param query
*/
public static void deleteIndex(IndexWriter writer, Query query) {
try {
writer.deleteDocuments(query);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 批量刪除索引[注意:請自己關閉IndexWriter物件]
* @param writer
* @param terms
*/
public static void deleteIndexs(IndexWriter writer,Term[] terms) {
try {
writer.deleteDocuments(terms);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 批量刪除索引[注意:請自己關閉IndexWriter物件]
* @param writer
* @param querys
*/
public static void deleteIndexs(IndexWriter writer,Query[] querys) {
try {
writer.deleteDocuments(querys);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 刪除所有索引文件
* @param writer
*/
public static void deleteAllIndex(IndexWriter writer) {
try {
writer.deleteAll();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 更新索引文件
* @param writer
* @param term
* @param document
*/
public static void updateIndex(IndexWriter writer,Term term,Document document) {
try {
writer.updateDocument(term, document);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 更新索引文件
* @param writer
* @param term
* @param document
*/
public static void updateIndex(IndexWriter writer,String field,String value,Document document) {
updateIndex(writer, new Term(field, value), document);
}
/**
* 新增索引文件
* @param writer
* @param doc
*/
public static void addIndex(IndexWriter writer, Document document) {
updateIndex(writer, null, document);
}
/**
* 索引文件查詢
* @param searcher
* @param query
* @return
*/
public static List<Document> query(IndexSearcher searcher,Query query) {
TopDocs topDocs = null;
try {
topDocs = searcher.search(query, Integer.MAX_VALUE);
} catch (IOException e) {
e.printStackTrace();
}
ScoreDoc[] scores = topDocs.scoreDocs;
int length = scores.length;
if (length <= 0) {
return Collections.emptyList();
}
List<Document> docList = new ArrayList<Document>();
try {
for (int i = 0; i < length; i++) {
Document doc = searcher.doc(scores[i].doc);
docList.add(doc);
}
} catch (IOException e) {
e.printStackTrace();
}
return docList;
}
/**
* 返回索引文件的總數[注意:請自己手動關閉IndexReader]
* @param reader
* @return
*/
public static int getIndexTotalCount(IndexReader reader) {
return reader.numDocs();
}
/**
* 返回索引文件中最大文件ID[注意:請自己手動關閉IndexReader]
* @param reader
* @return
*/
public static int getMaxDocId(IndexReader reader) {
return reader.maxDoc();
}
/**
* 返回已經刪除尚未提交的文件總數[注意:請自己手動關閉IndexReader]
* @param reader
* @return
*/
public static int getDeletedDocNum(IndexReader reader) {
return getMaxDocId(reader) - getIndexTotalCount(reader);
}
/**
* 根據docId查詢索引文件
* @param reader IndexReader物件
* @param docID documentId
* @param fieldsToLoad 需要返回的field
* @return
*/
public static Document findDocumentByDocId(IndexReader reader,int docID, Set<String> fieldsToLoad) {
try {
return reader.document(docID, fieldsToLoad);
} catch (IOException e) {
return null;
}
}
/**
* 根據docId查詢索引文件
* @param reader IndexReader物件
* @param docID documentId
* @return
*/
public static Document findDocumentByDocId(IndexReader reader,int docID) {
return findDocumentByDocId(reader, docID, null);
}
/**
* @Title: createHighlighter
* @Description: 建立高亮器
* @param query 索引查詢物件
* @param prefix 高亮字首字串
* @param stuffix 高亮字尾字串
* @param fragmenterLength 摘要最大長度
* @return
*/
public static Highlighter createHighlighter(Query query, String prefix, String stuffix, int fragmenterLength) {
Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix.trim().length() == 0) ?
"<font color=\"red\">" : prefix, (stuffix == null || stuffix.trim().length() == 0)?"</font>" : stuffix);
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength);
highlighter.setTextFragmenter(fragmenter);
return highlighter;
}
/**
* @Title: highlight
* @Description: 生成高亮文字
* @param document 索引文件物件
* @param highlighter 高亮器
* @param analyzer 索引分詞器
* @param field 高亮欄位
* @return
* @throws IOException
* @throws InvalidTokenOffsetsException
*/
public static String highlight(Document document,Highlighter highlighter,Analyzer analyzer,String field) throws IOException {
List<IndexableField> list = document.getFields();
for (IndexableField fieldable : list) {
String fieldValue = fieldable.stringValue();
if(fieldable.name().equals(field)) {
try {
fieldValue = highlighter.getBestFragment(analyzer, field, fieldValue);
} catch (InvalidTokenOffsetsException e) {
fieldValue = fieldable.stringValue();
}
return (fieldValue == null || fieldValue.trim().length() == 0)? fieldable.stringValue() : fieldValue;
}
}
return null;
}
/**
* @Title: searchTotalRecord
* @Description: 獲取符合條件的總記錄數
* @param query
* @return
* @throws IOException
*/
public static int searchTotalRecord(IndexSearcher search,Query query) {
ScoreDoc[] docs = null;
try {
TopDocs topDocs = search.search(query, Integer.MAX_VALUE);
if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
return 0;
}
docs = topDocs.scoreDocs;
} catch (IOException e) {
e.printStackTrace();
}
return docs.length;
}
/**
* @Title: pageQuery
* @Description: Lucene分頁查詢
* @param searcher
* @param query
* @param page
* @throws IOException
*/
public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page) {
int totalRecord = searchTotalRecord(searcher,query);
//設定總記錄數
page.setTotalRecord(totalRecord);
TopDocs topDocs = null;
try {
topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
} catch (IOException e) {
e.printStackTrace();
}
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = null;
try {
document = searcher.doc(docID);
} catch (IOException e) {
e.printStackTrace();
}
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
closeIndexReader(searcher.getIndexReader());
}
/**
* @Title: pageQuery
* @Description: 分頁查詢[如果設定了高亮,則會更新索引文件]
* @param searcher
* @param directory
* @param query
* @param page
* @param highlighterParam
* @param writerConfig
* @throws IOException
*/
public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page,HighlighterParam highlighterParam,IndexWriterConfig writerConfig) throws IOException {
IndexWriter writer = null;
//若未設定高亮
if(null == highlighterParam || !highlighterParam.isHighlight()) {
pageQuery(searcher,directory,query, page);
} else {
int totalRecord = searchTotalRecord(searcher,query);
System.out.println("totalRecord:" + totalRecord);
//設定總記錄數
page.setTotalRecord(totalRecord);
TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
writer = getIndexWrtier(directory, writerConfig);
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = searcher.doc(docID);
String content = document.get(highlighterParam.getFieldName());
if(null != content && content.trim().length() > 0) {
//建立高亮器
Highlighter highlighter = LuceneUtils.createHighlighter(query,
highlighterParam.getPrefix(), highlighterParam.getStuffix(),
highlighterParam.getFragmenterLength());
String text = highlight(document, highlighter, analyzer, highlighterParam.getFieldName());
//若高亮後跟原始文字不相同,表示高亮成功
if(!text.equals(content)) {
Document tempdocument = new Document();
List<IndexableField> indexableFieldList = document.getFields();
if(null != indexableFieldList && indexableFieldList.size() > 0) {
for(IndexableField field : indexableFieldList) {
if(field.name().equals(highlighterParam.getFieldName())) {
tempdocument.add(new TextField(field.name(), text, Field.Store.YES));
} else {
tempdocument.add(field);
}
}
}
updateIndex(writer, new Term(highlighterParam.getFieldName(),content), tempdocument);
document = tempdocument;
}
}
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
}
closeIndexReader(searcher.getIndexReader());
closeIndexWriter(writer);
}
}
package com.yida.framework.lucene5.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;
public class Page<T> {
/**當前第幾頁(從1開始計算)*/
private int currentPage;
/**每頁顯示幾條*/
private int pageSize;
/**總記錄數*/
private int totalRecord;
/**總頁數*/
private int totalPage;
/**分頁資料集合[用泛型T來限定集合元素型別]*/
private Collection<T> items;
/**當前顯示起始索引(從零開始計算)*/
private int startIndex;
/**當前顯示結束索引(從零開始計算)*/
private int endIndex;
/**一組最多顯示幾個頁碼[比如Google一組最多顯示10個頁碼]*/
private int groupSize;
/**左邊偏移量*/
private int leftOffset = 5;
/**右邊偏移量*/
private int rightOffset = 4;
/**當前頁碼範圍*/
private String[] pageRange;
/**分頁資料*/
private List<Document> docList;
/**上一頁最後一個ScoreDoc物件*/
private ScoreDoc afterDoc;
/**上一頁最後一個ScoreDoc物件的Document物件ID*/
private int afterDocId;
public void setRangeIndex() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if(totalPage < 2) {
startIndex = 0;
endIndex = totalPage - startIndex;
} else {
int currentPage = getCurrentPage();