1. 程式人生 > >Lucene5學習之LuceneUtils工具類簡單封裝

Lucene5學習之LuceneUtils工具類簡單封裝

週六花了整整一下午,將Lucene5中有關索引的常見操作進行了簡單封裝,廢話不多說,上程式碼:

package com.yida.framework.lucene5.util;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.lucene.index.DirectoryReader;
import
org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; /** * Lucene索引讀寫器/查詢器單例獲取工具類 * @author
Lanxiaowei * */
public class LuceneManager { private volatile static LuceneManager singleton; private volatile static IndexWriter writer; private volatile static IndexReader reader; private volatile static IndexSearcher searcher; private final Lock writerLock = new ReentrantLock(); //private final Lock readerLock = new ReentrantLock();
//private final Lock searcherLock = new ReentrantLock(); private LuceneManager() {} public static LuceneManager getInstance() { if (null == singleton) { synchronized (LuceneManager.class) { if (null == singleton) { singleton = new LuceneManager(); } } } return singleton; } /** * 獲取IndexWriter單例物件 * @param dir * @param config * @return */ public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) { if(null == dir) { throw new IllegalArgumentException("Directory can not be null."); } if(null == config) { throw new IllegalArgumentException("IndexWriterConfig can not be null."); } try { writerLock.lock(); if(null == writer){ //如果索引目錄被鎖,則直接拋異常 if(IndexWriter.isLocked(dir)) { throw new LockObtainFailedException("Directory of index had been locked."); } writer = new IndexWriter(dir, config); } } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { writerLock.unlock(); } return writer; } /** * 獲取IndexReader物件 * @param dir * @param enableNRTReader 是否開啟NRTReader * @return */ public IndexReader getIndexReader(Directory dir,boolean enableNRTReader) { if(null == dir) { throw new IllegalArgumentException("Directory can not be null."); } try { if(null == reader){ reader = DirectoryReader.open(dir); } else { if(enableNRTReader && reader instanceof DirectoryReader) { //開啟近實時Reader,能立即看到動態新增/刪除的索引變化 reader = DirectoryReader.openIfChanged((DirectoryReader)reader); } } } catch (IOException e) { e.printStackTrace(); } return reader; } /** * 獲取IndexReader物件(預設不啟用NETReader) * @param dir * @return */ public IndexReader getIndexReader(Directory dir) { return getIndexReader(dir, false); } /** * 獲取IndexSearcher物件 * @param reader IndexReader物件例項 * @param executor 如果你需要開啟多執行緒查詢,請提供ExecutorService物件引數 * @return */ public IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) { if(null == reader) { throw new IllegalArgumentException("The indexReader can not be null."); } if(null == searcher){ searcher = new IndexSearcher(reader); } return searcher; } /** * 獲取IndexSearcher物件(不支援多執行緒查詢) * @param reader IndexReader物件例項 * @return */ public IndexSearcher getIndexSearcher(IndexReader reader) { return getIndexSearcher(reader, null); } }
package com.yida.framework.lucene5.util;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;

import org.ansj.lucene5.AnsjAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * Lucene工具類(基於Lucene5.0封裝)
 * @author Lanxiaowei
 *
 */
public class LuceneUtils {
private static final LuceneManager luceneManager = LuceneManager.getInstance();
private static Analyzer analyzer = new AnsjAnalyzer();

/**
 * 開啟索引目錄
 * 
 * @param luceneDir
 * @return
 * @throws IOException
 */
public static FSDirectory openFSDirectory(String luceneDir) {
FSDirectory directory = null;
try {
directory = FSDirectory.open(Paths.get(luceneDir));
/**
 * 注意:isLocked方法內部會試圖去獲取Lock,如果獲取到Lock,會關閉它,否則return false表示索引目錄沒有被鎖,
 * 這也就是為什麼unlock方法被從IndexWriter類中移除的原因
 */
IndexWriter.isLocked(directory);
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}

/**
 * 關閉索引目錄並銷燬
 * @param directory
 * @throws IOException
 */
public static void closeDirectory(Directory directory) throws IOException {
if (null != directory) {
directory.close();
directory = null;
}
}

/**
 * 獲取IndexWriter
 * @param dir
 * @param config
 * @return
 */
public static IndexWriter getIndexWrtier(Directory dir, IndexWriterConfig config) {
return luceneManager.getIndexWriter(dir, config);
}

/**
 * 獲取IndexWriter
 * @param dir
 * @param config
 * @return
 */
public static IndexWriter getIndexWrtier(String directoryPath, IndexWriterConfig config) {
FSDirectory directory = openFSDirectory(directoryPath);
return luceneManager.getIndexWriter(directory, config);
}

/**
 * 獲取IndexReader
 * @param dir
 * @param enableNRTReader  是否開啟NRTReader
 * @return
 */
public static IndexReader getIndexReader(Directory dir,boolean enableNRTReader) {
return luceneManager.getIndexReader(dir, enableNRTReader);
}

/**
 * 獲取IndexReader(預設不啟用NRTReader)
 * @param dir
 * @return
 */
public static IndexReader getIndexReader(Directory dir) {
return luceneManager.getIndexReader(dir);
}

/**
 * 獲取IndexSearcher
 * @param reader    IndexReader物件
 * @param executor  如果你需要開啟多執行緒查詢,請提供ExecutorService物件引數
 * @return
 */
public static IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) {
return luceneManager.getIndexSearcher(reader, executor);
}

/**
 * 獲取IndexSearcher(不支援多執行緒查詢)
 * @param reader    IndexReader物件
 * @return
 */
public static IndexSearcher getIndexSearcher(IndexReader reader) {
return luceneManager.getIndexSearcher(reader);
}

/**
 * 建立QueryParser物件
 * @param field
 * @param analyzer
 * @return
 */
public static QueryParser createQueryParser(String field, Analyzer analyzer) {
return new QueryParser(field, analyzer);
}

/**
 * 關閉IndexReader
 * @param reader
 */
public static void closeIndexReader(IndexReader reader) {
if (null != reader) {
try {
reader.close();
reader = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
 * 關閉IndexWriter
 * @param writer
 */
public static void closeIndexWriter(IndexWriter writer) {
if(null != writer) {
try {
writer.close();
writer = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
 * 關閉IndexReader和IndexWriter
 * @param reader
 * @param writer
 */
public static void closeAll(IndexReader reader, IndexWriter writer) {
closeIndexReader(reader);
closeIndexWriter(writer);
}

/**
 * 刪除索引[注意:請自己關閉IndexWriter物件]
 * @param writer
 * @param field
 * @param value
 */
public static void deleteIndex(IndexWriter writer, String field, String value) {
try {
writer.deleteDocuments(new Term[] {new Term(field,value)});
} catch (IOException e) {
e.printStackTrace();
}
}

/**
 * 刪除索引[注意:請自己關閉IndexWriter物件]
 * @param writer
 * @param query
 */
public static void deleteIndex(IndexWriter writer, Query query) {
try {
writer.deleteDocuments(query);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
 * 批量刪除索引[注意:請自己關閉IndexWriter物件]
 * @param writer
 * @param terms
 */
public static void deleteIndexs(IndexWriter writer,Term[] terms) {
try {
writer.deleteDocuments(terms);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
 * 批量刪除索引[注意:請自己關閉IndexWriter物件]
 * @param writer
 * @param querys
 */
public static void deleteIndexs(IndexWriter writer,Query[] querys) {
try {
writer.deleteDocuments(querys);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
 * 刪除所有索引文件
 * @param writer
 */
public static void deleteAllIndex(IndexWriter writer) {
try {
writer.deleteAll();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
 * 更新索引文件
 * @param writer
 * @param term
 * @param document
 */
public static void updateIndex(IndexWriter writer,Term term,Document document) {
try {
writer.updateDocument(term, document);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
 * 更新索引文件
 * @param writer
 * @param term
 * @param document
 */
public static void updateIndex(IndexWriter writer,String field,String value,Document document) {
updateIndex(writer, new Term(field, value), document);
}

/**
 * 新增索引文件
 * @param writer
 * @param doc
 */
public static void addIndex(IndexWriter writer, Document document) {
updateIndex(writer, null, document);
}

/**
 * 索引文件查詢
 * @param searcher
 * @param query
 * @return
 */
public static List<Document> query(IndexSearcher searcher,Query query) {
TopDocs topDocs = null;
try {
topDocs = searcher.search(query, Integer.MAX_VALUE);
} catch (IOException e) {
e.printStackTrace();
}
ScoreDoc[] scores = topDocs.scoreDocs;
int length = scores.length;
if (length <= 0) {
return Collections.emptyList();
}
List<Document> docList = new ArrayList<Document>();
try {
for (int i = 0; i < length; i++) {
Document doc = searcher.doc(scores[i].doc);
docList.add(doc);
}
} catch (IOException e) {
e.printStackTrace();
}
return docList;
}

/**
 * 返回索引文件的總數[注意:請自己手動關閉IndexReader]
 * @param reader
 * @return
 */
public static int getIndexTotalCount(IndexReader reader) {
return reader.numDocs();
}

/**
 * 返回索引文件中最大文件ID[注意:請自己手動關閉IndexReader]
 * @param reader
 * @return
 */
public static int getMaxDocId(IndexReader reader) {
return reader.maxDoc();
}

/**
 * 返回已經刪除尚未提交的文件總數[注意:請自己手動關閉IndexReader]
 * @param reader
 * @return
 */
public static int getDeletedDocNum(IndexReader reader) {
return getMaxDocId(reader) - getIndexTotalCount(reader);
}

/**
 * 根據docId查詢索引文件
 * @param reader         IndexReader物件
 * @param docID          documentId
 * @param fieldsToLoad   需要返回的field
 * @return
 */
public static Document findDocumentByDocId(IndexReader reader,int docID, Set<String> fieldsToLoad) {
try {
return reader.document(docID, fieldsToLoad);
} catch (IOException e) {
return null;
}
}

/**
 * 根據docId查詢索引文件
 * @param reader         IndexReader物件
 * @param docID          documentId
 * @return
 */
public static Document findDocumentByDocId(IndexReader reader,int docID) {
return findDocumentByDocId(reader, docID, null);
}

/**
 * @Title: createHighlighter
 * @Description: 建立高亮器
 * @param query             索引查詢物件
 * @param prefix            高亮字首字串
 * @param stuffix           高亮字尾字串
 * @param fragmenterLength  摘要最大長度
 * @return
 */
public static Highlighter createHighlighter(Query query, String prefix, String stuffix, int fragmenterLength) {
Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix.trim().length() == 0) ? 
"<font color=\"red\">" : prefix, (stuffix == null || stuffix.trim().length() == 0)?"</font>" : stuffix);
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength);
highlighter.setTextFragmenter(fragmenter);
return highlighter;
}

/**
 * @Title: highlight
 * @Description: 生成高亮文字
 * @param document          索引文件物件
 * @param highlighter       高亮器
 * @param analyzer          索引分詞器
 * @param field             高亮欄位
 * @return
 * @throws IOException
 * @throws InvalidTokenOffsetsException
 */
public static String highlight(Document document,Highlighter highlighter,Analyzer analyzer,String field) throws IOException {
List<IndexableField> list = document.getFields();
for (IndexableField fieldable : list) {
String fieldValue = fieldable.stringValue();
if(fieldable.name().equals(field)) {
try {
fieldValue = highlighter.getBestFragment(analyzer, field, fieldValue);
} catch (InvalidTokenOffsetsException e) {
fieldValue = fieldable.stringValue();
}
return (fieldValue == null || fieldValue.trim().length() == 0)? fieldable.stringValue() : fieldValue;
}
}
return null;
}

/**
 * @Title: searchTotalRecord
 * @Description: 獲取符合條件的總記錄數
 * @param query
 * @return
 * @throws IOException
 */
public static int searchTotalRecord(IndexSearcher search,Query query) {
ScoreDoc[] docs = null;
try {
TopDocs topDocs = search.search(query, Integer.MAX_VALUE);
if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
return 0;
}
docs = topDocs.scoreDocs;
} catch (IOException e) {
e.printStackTrace();
}
return docs.length;
}

/**
 * @Title: pageQuery
 * @Description: Lucene分頁查詢
 * @param searcher
 * @param query
 * @param page
 * @throws IOException
 */
public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page) {
int totalRecord = searchTotalRecord(searcher,query);
//設定總記錄數
page.setTotalRecord(totalRecord);
TopDocs topDocs = null;
try {
topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
} catch (IOException e) {
e.printStackTrace();
}
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = null;
try {
document = searcher.doc(docID);
} catch (IOException e) {
e.printStackTrace();
}
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
closeIndexReader(searcher.getIndexReader());
}

/**
 * @Title: pageQuery
 * @Description: 分頁查詢[如果設定了高亮,則會更新索引文件]
 * @param searcher
 * @param directory
 * @param query
 * @param page
 * @param highlighterParam
 * @param writerConfig
 * @throws IOException
 */
public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page,HighlighterParam highlighterParam,IndexWriterConfig writerConfig) throws IOException {
IndexWriter writer = null;
//若未設定高亮
if(null == highlighterParam || !highlighterParam.isHighlight()) {
pageQuery(searcher,directory,query, page);
} else {
int totalRecord = searchTotalRecord(searcher,query);
System.out.println("totalRecord:" + totalRecord);
//設定總記錄數
page.setTotalRecord(totalRecord);
TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
List<Document> docList = new ArrayList<Document>();
ScoreDoc[] docs = topDocs.scoreDocs;
int index = 0;
writer = getIndexWrtier(directory, writerConfig);
for (ScoreDoc scoreDoc : docs) {
int docID = scoreDoc.doc;
Document document = searcher.doc(docID);
String content = document.get(highlighterParam.getFieldName());
if(null != content && content.trim().length() > 0) {
//建立高亮器
Highlighter highlighter = LuceneUtils.createHighlighter(query, 
highlighterParam.getPrefix(), highlighterParam.getStuffix(), 
highlighterParam.getFragmenterLength());
String text = highlight(document, highlighter, analyzer, highlighterParam.getFieldName());
//若高亮後跟原始文字不相同,表示高亮成功
if(!text.equals(content)) {
Document tempdocument = new Document();
List<IndexableField> indexableFieldList = document.getFields();
if(null != indexableFieldList && indexableFieldList.size() > 0) {
for(IndexableField field : indexableFieldList) {
if(field.name().equals(highlighterParam.getFieldName())) {
tempdocument.add(new TextField(field.name(), text, Field.Store.YES));
} else {
tempdocument.add(field);
}
}
}
updateIndex(writer, new Term(highlighterParam.getFieldName(),content), tempdocument);
document = tempdocument;
}
}
if(index == docs.length - 1) {
page.setAfterDoc(scoreDoc);
page.setAfterDocId(docID);
}
docList.add(document);
index++;
}
page.setItems(docList);
}
closeIndexReader(searcher.getIndexReader());
closeIndexWriter(writer);
}
}
package com.yida.framework.lucene5.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;
public class Page<T> {
/**當前第幾頁(從1開始計算)*/
private int currentPage;
/**每頁顯示幾條*/
private int pageSize;
/**總記錄數*/
private int totalRecord;
/**總頁數*/
private int totalPage;
/**分頁資料集合[用泛型T來限定集合元素型別]*/
private Collection<T> items;
/**當前顯示起始索引(從零開始計算)*/
private int startIndex;
/**當前顯示結束索引(從零開始計算)*/
private int endIndex;
/**一組最多顯示幾個頁碼[比如Google一組最多顯示10個頁碼]*/
private int groupSize;
/**左邊偏移量*/
private int leftOffset = 5;
/**右邊偏移量*/
private int rightOffset = 4;
/**當前頁碼範圍*/
private String[] pageRange;
/**分頁資料*/
private List<Document> docList;
/**上一頁最後一個ScoreDoc物件*/
private ScoreDoc afterDoc;
/**上一頁最後一個ScoreDoc物件的Document物件ID*/
private int afterDocId;
public void setRangeIndex() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if(totalPage < 2) {
startIndex = 0;
endIndex = totalPage - startIndex;
} else {
int currentPage = getCurrentPage();
if(groupSize >= totalPage) {
startIndex = 0;
endIndex = totalPage - startIndex - 1;
} else {
int leftOffset = getLeftOffset();
int middleOffset = getMiddleOffset();
if(-1 == middleOffset) {
startIndex = 0;
endIndex = groupSize - 1;
} else if(currentPage <= leftOffset) {
startIndex = 0;
endIndex = groupSize - 1;
} else {
startIndex = currentPage - leftOffset - 1;
if(currentPage + rightOffset > totalPage) {
endIndex = totalPage - 1;
} else {
endIndex = currentPage + rightOffset - 1;
}
}
}
}
}
public int getCurrentPage() {
if(currentPage <= 0) {
currentPage = 1;
} else {
int totalPage = getTotalPage();
if(totalPage > 0 && currentPage > getTotalPage()) {
currentPage = totalPage;
}
}
return currentPage;
}
public void setCurrentPage(int currentPage) {
this.currentPage = currentPage;
}
public int getPageSize() {
if(pageSize <= 0) {
pageSize = 10;
}
return pageSize;
}
public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}
public int getTotalRecord() {
return totalRecord;
}
public void setTotalRecord(int totalRecord) {
this.totalRecord = totalRecord;
}
public int getTotalPage() {
int totalRecord = getTotalRecord();
if(totalRecord == 0) {
totalPage = 0;
} else {
int pageSize = getPageSize();
totalPage = totalRecord % pageSize == 0 ? totalRecord / pageSize : (totalRecord / pageSize) + 1;
}
return totalPage;
}
public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}
public int getStartIndex() {
return startIndex;
}
public void setStartIndex(int startIndex) {
this.startIndex = startIndex;
}
public int getEndIndex() {
return endIndex;
}
public void setEndIndex(int endIndex) {
this.endIndex = endIndex;
}
public int getGroupSize() {
if(groupSize <= 0) {
groupSize = 10;
}
return groupSize;
}
public void setGroupSize(int groupSize) {
this.groupSize = groupSize;
}
public int getLeftOffset() {
leftOffset = getGroupSize() / 2;
return leftOffset;
}
public void setLeftOffset(int leftOffset) {
this.leftOffset = leftOffset;
}
public int getRightOffset() {
int groupSize = getGroupSize();
if(groupSize % 2 == 0) {
rightOffset = (groupSize / 2) - 1;
} else {
rightOffset = groupSize / 2;
}
return rightOffset;
}
public void setRightOffset(int rightOffset) {
this.rightOffset = rightOffset;
}
/**中心位置索引[從1開始計算]*/
public int getMiddleOffset() {
int groupSize = getGroupSize();
int totalPage = getTotalPage();
if(groupSize >= totalPage) {
return -1;
}
return getLeftOffset() + 1;
}
public String[] getPageRange() {
setRangeIndex();
int size = endIndex - startIndex + 1;
if(size <= 0) {
return new String[0];
}
if(totalPage == 1) {
return new String[] {"1"};
}
pageRange = new String[size];
for(int i=0; i < size; i++) {
pageRange[i] = (startIndex + i + 1) + "";
}
return pageRange;
}
public void setPageRange(String[] pageRange) {
this.pageRange = pageRange;
}
public Collection<T> getItems() {
return items;
}
public void setItems(Collection<T> items) {
this.items = items;
}
public List<Document> getDocList() {
return docList;
}
public void setDocList(List<Document> docList) {
this.docList = docList;
}
public ScoreDoc getAfterDoc() {
setAfterDocId(afterDocId);
return afterDoc;
}
public void setAfterDoc(ScoreDoc afterDoc) {
this.afterDoc = afterDoc;
}
public int getAfterDocId() {
return afterDocId;
}
public void setAfterDocId(int afterDocId) {
this.afterDocId = afterDocId;
if(null == afterDoc) {
this.afterDoc = new ScoreDoc(afterDocId, 1.0f);
}
}
public Page() {}
public Page(int currentPage, int pageSize) {
this.currentPage = currentPage;
this.pageSize = pageSize;
}
public Page(int currentPage, int pageSize, Collection<T> items) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.items = items;
}
public Page(int currentPage, int pageSize, Collection<T> items, int groupSize) {
this.currentPage = currentPage;
this.pageSize = pageSize;
this.items = items;
this.groupSize = groupSize;
}
public Page(int currentPage, int