1. 程式人生 > >LuceneUtils工具類簡單封裝

LuceneUtils工具類簡單封裝

週六花了整整一下午,將Lucene5中有關索引的常見操作進行了簡單封裝,廢話不多說,上程式碼:

package com.yida.framework.lucene5.util;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.lucene.index.DirectoryReader;
import
org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; /** * Lucene索引讀寫器/查詢器單例獲取工具類 * @author
Lanxiaowei * */
public class LuceneManager { private volatile static LuceneManager singleton; private volatile static IndexWriter writer; private volatile static IndexReader reader; private volatile static IndexSearcher searcher; private final Lock writerLock = new ReentrantLock(); //private final Lock readerLock = new ReentrantLock();
//private final Lock searcherLock = new ReentrantLock(); private LuceneManager() {} public static LuceneManager getInstance() { if (null == singleton) { synchronized (LuceneManager.class) { if (null == singleton) { singleton = new LuceneManager(); } } } return singleton; } /** * 獲取IndexWriter單例物件 * @param dir * @param config * @return */ public IndexWriter getIndexWriter(Directory dir, IndexWriterConfig config) { if(null == dir) { throw new IllegalArgumentException("Directory can not be null."); } if(null == config) { throw new IllegalArgumentException("IndexWriterConfig can not be null."); } try { writerLock.lock(); if(null == writer){ //如果索引目錄被鎖,則直接拋異常 if(IndexWriter.isLocked(dir)) { throw new LockObtainFailedException("Directory of index had been locked."); } writer = new IndexWriter(dir, config); } } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { writerLock.unlock(); } return writer; } /** * 獲取IndexReader物件 * @param dir * @param enableNRTReader 是否開啟NRTReader * @return */ public IndexReader getIndexReader(Directory dir,boolean enableNRTReader) { if(null == dir) { throw new IllegalArgumentException("Directory can not be null."); } try { if(null == reader){ reader = DirectoryReader.open(dir); } else { if(enableNRTReader && reader instanceof DirectoryReader) { //開啟近實時Reader,能立即看到動態新增/刪除的索引變化 reader = DirectoryReader.openIfChanged((DirectoryReader)reader); } } } catch (IOException e) { e.printStackTrace(); } return reader; } /** * 獲取IndexReader物件(預設不啟用NETReader) * @param dir * @return */ public IndexReader getIndexReader(Directory dir) { return getIndexReader(dir, false); } /** * 獲取IndexSearcher物件 * @param reader IndexReader物件例項 * @param executor 如果你需要開啟多執行緒查詢,請提供ExecutorService物件引數 * @return */ public IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) { if(null == reader) { throw new IllegalArgumentException("The indexReader can not be null."); } if(null == searcher){ searcher = new IndexSearcher(reader); } return searcher; } /** * 獲取IndexSearcher物件(不支援多執行緒查詢) * @param reader IndexReader物件例項 * @return */ public IndexSearcher getIndexSearcher(IndexReader reader) { return getIndexSearcher(reader, null); } }
package com.yida.framework.lucene5.util;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;

import org.ansj.lucene5.AnsjAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * Lucene工具類(基於Lucene5.0封裝)
 * @author Lanxiaowei
 *
 */
public class LuceneUtils {
	private static final LuceneManager luceneManager = LuceneManager.getInstance();
	private static Analyzer analyzer = new AnsjAnalyzer();
	
	/**
	 * 開啟索引目錄
	 * 
	 * @param luceneDir
	 * @return
	 * @throws IOException
	 */
	public static FSDirectory openFSDirectory(String luceneDir) {
		FSDirectory directory = null;
		try {
			directory = FSDirectory.open(Paths.get(luceneDir));
			/**
			 * 注意:isLocked方法內部會試圖去獲取Lock,如果獲取到Lock,會關閉它,否則return false表示索引目錄沒有被鎖,
			 * 這也就是為什麼unlock方法被從IndexWriter類中移除的原因
			 */
			IndexWriter.isLocked(directory);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return directory;
	}
	
	/**
	 * 關閉索引目錄並銷燬
	 * @param directory
	 * @throws IOException
	 */
	public static void closeDirectory(Directory directory) throws IOException {
		if (null != directory) {
			directory.close();
			directory = null;
		}
	}
	
	/**
	 * 獲取IndexWriter
	 * @param dir
	 * @param config
	 * @return
	 */
	public static IndexWriter getIndexWrtier(Directory dir, IndexWriterConfig config) {
		return luceneManager.getIndexWriter(dir, config);
	}
	
	/**
	 * 獲取IndexWriter
	 * @param dir
	 * @param config
	 * @return
	 */
	public static IndexWriter getIndexWrtier(String directoryPath, IndexWriterConfig config) {
		FSDirectory directory = openFSDirectory(directoryPath);
		return luceneManager.getIndexWriter(directory, config);
	}
	
	/**
	 * 獲取IndexReader
	 * @param dir
	 * @param enableNRTReader  是否開啟NRTReader
	 * @return
	 */
	public static IndexReader getIndexReader(Directory dir,boolean enableNRTReader) {
		return luceneManager.getIndexReader(dir, enableNRTReader);
	}
	
	/**
	 * 獲取IndexReader(預設不啟用NRTReader)
	 * @param dir
	 * @return
	 */
	public static IndexReader getIndexReader(Directory dir) {
		return luceneManager.getIndexReader(dir);
	}
	
	/**
	 * 獲取IndexSearcher
	 * @param reader    IndexReader物件
	 * @param executor  如果你需要開啟多執行緒查詢,請提供ExecutorService物件引數
	 * @return
	 */
	public static IndexSearcher getIndexSearcher(IndexReader reader,ExecutorService executor) {
		return luceneManager.getIndexSearcher(reader, executor);
	}
	
	/**
	 * 獲取IndexSearcher(不支援多執行緒查詢)
	 * @param reader    IndexReader物件
	 * @return
	 */
	public static IndexSearcher getIndexSearcher(IndexReader reader) {
		return luceneManager.getIndexSearcher(reader);
	}
	
	/**
	 * 建立QueryParser物件
	 * @param field
	 * @param analyzer
	 * @return
	 */
	public static QueryParser createQueryParser(String field, Analyzer analyzer) {
		return new QueryParser(field, analyzer);
	}
	
	/**
	 * 關閉IndexReader
	 * @param reader
	 */
	public static void closeIndexReader(IndexReader reader) {
		if (null != reader) {
			try {
				reader.close();
				reader = null;
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	/**
	 * 關閉IndexWriter
	 * @param writer
	 */
	public static void closeIndexWriter(IndexWriter writer) {
		if(null != writer) {
			try {
				writer.close();
				writer = null;
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	
	/**
	 * 關閉IndexReader和IndexWriter
	 * @param reader
	 * @param writer
	 */
	public static void closeAll(IndexReader reader, IndexWriter writer) {
		closeIndexReader(reader);
		closeIndexWriter(writer);
	}
	
	/**
	 * 刪除索引[注意:請自己關閉IndexWriter物件]
	 * @param writer
	 * @param field
	 * @param value
	 */
	public static void deleteIndex(IndexWriter writer, String field, String value) {
		try {
			writer.deleteDocuments(new Term[] {new Term(field,value)});
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 刪除索引[注意:請自己關閉IndexWriter物件]
	 * @param writer
	 * @param query
	 */
	public static void deleteIndex(IndexWriter writer, Query query) {
		try {
			writer.deleteDocuments(query);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 批量刪除索引[注意:請自己關閉IndexWriter物件]
	 * @param writer
	 * @param terms
	 */
	public static void deleteIndexs(IndexWriter writer,Term[] terms) {
		try {
			writer.deleteDocuments(terms);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 批量刪除索引[注意:請自己關閉IndexWriter物件]
	 * @param writer
	 * @param querys
	 */
	public static void deleteIndexs(IndexWriter writer,Query[] querys) {
		try {
			writer.deleteDocuments(querys);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 刪除所有索引文件
	 * @param writer
	 */
	public static void deleteAllIndex(IndexWriter writer) {
		try {
			writer.deleteAll();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 更新索引文件
	 * @param writer
	 * @param term
	 * @param document
	 */
	public static void updateIndex(IndexWriter writer,Term term,Document document) {
		try {
			writer.updateDocument(term, document);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 更新索引文件
	 * @param writer
	 * @param term
	 * @param document
	 */
	public static void updateIndex(IndexWriter writer,String field,String value,Document document) {
		updateIndex(writer, new Term(field, value), document);
	}
	
	/**
	 * 新增索引文件
	 * @param writer
	 * @param doc
	 */
	public static void addIndex(IndexWriter writer, Document document) {
		updateIndex(writer, null, document);
	}
	
	/**
	 * 索引文件查詢
	 * @param searcher
	 * @param query
	 * @return
	 */
	public static List<Document> query(IndexSearcher searcher,Query query) {
		TopDocs topDocs = null;
		try {
			topDocs = searcher.search(query, Integer.MAX_VALUE);
		} catch (IOException e) {
			e.printStackTrace();
		}
		ScoreDoc[] scores = topDocs.scoreDocs;
		int length = scores.length;
		if (length <= 0) {
			return Collections.emptyList();
		}
		List<Document> docList = new ArrayList<Document>();
		try {
			for (int i = 0; i < length; i++) {
				Document doc = searcher.doc(scores[i].doc);
				docList.add(doc);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return docList;
	}
	
	/**
	 * 返回索引文件的總數[注意:請自己手動關閉IndexReader]
	 * @param reader
	 * @return
	 */
	public static int getIndexTotalCount(IndexReader reader) {
		return reader.numDocs();
	}
	
	/**
	 * 返回索引文件中最大文件ID[注意:請自己手動關閉IndexReader]
	 * @param reader
	 * @return
	 */
	public static int getMaxDocId(IndexReader reader) {
		return reader.maxDoc();
	}
	
	/**
	 * 返回已經刪除尚未提交的文件總數[注意:請自己手動關閉IndexReader]
	 * @param reader
	 * @return
	 */
	public static int getDeletedDocNum(IndexReader reader) {
		return getMaxDocId(reader) - getIndexTotalCount(reader);
	}
	
	/**
	 * 根據docId查詢索引文件
	 * @param reader         IndexReader物件
	 * @param docID          documentId
	 * @param fieldsToLoad   需要返回的field
	 * @return
	 */
	public static Document findDocumentByDocId(IndexReader reader,int docID, Set<String> fieldsToLoad) {
		try {
			return reader.document(docID, fieldsToLoad);
		} catch (IOException e) {
			return null;
		}
	}
	
	/**
	 * 根據docId查詢索引文件
	 * @param reader         IndexReader物件
	 * @param docID          documentId
	 * @return
	 */
	public static Document findDocumentByDocId(IndexReader reader,int docID) {
		return findDocumentByDocId(reader, docID, null);
	}
	
	/**
	 * @Title: createHighlighter
	 * @Description: 建立高亮器
	 * @param query             索引查詢物件
	 * @param prefix            高亮字首字串
	 * @param stuffix           高亮字尾字串
	 * @param fragmenterLength  摘要最大長度
	 * @return
	 */
	public static Highlighter createHighlighter(Query query, String prefix, String stuffix, int fragmenterLength) {
		Formatter formatter = new SimpleHTMLFormatter((prefix == null || prefix.trim().length() == 0) ? 
			"<font color=\"red\">" : prefix, (stuffix == null || stuffix.trim().length() == 0)?"</font>" : stuffix);
		Scorer fragmentScorer = new QueryScorer(query);
		Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
		Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength);
		highlighter.setTextFragmenter(fragmenter);
		return highlighter;
	}
	
	/**
	 * @Title: highlight
	 * @Description: 生成高亮文字
	 * @param document          索引文件物件
	 * @param highlighter       高亮器
	 * @param analyzer          索引分詞器
	 * @param field             高亮欄位
	 * @return
	 * @throws IOException
	 * @throws InvalidTokenOffsetsException
	 */
	public static String highlight(Document document,Highlighter highlighter,Analyzer analyzer,String field) throws IOException {
		List<IndexableField> list = document.getFields();
		for (IndexableField fieldable : list) {
			String fieldValue = fieldable.stringValue();
			if(fieldable.name().equals(field)) {
				try {
					fieldValue = highlighter.getBestFragment(analyzer, field, fieldValue);
				} catch (InvalidTokenOffsetsException e) {
					fieldValue = fieldable.stringValue();
				}
				return (fieldValue == null || fieldValue.trim().length() == 0)? fieldable.stringValue() : fieldValue;
			}
		}
		return null;
	}
	
	/**
	 * @Title: searchTotalRecord
	 * @Description: 獲取符合條件的總記錄數
	 * @param query
	 * @return
	 * @throws IOException
	 */
	public static int searchTotalRecord(IndexSearcher search,Query query) {
		ScoreDoc[] docs = null;
		try {
			TopDocs topDocs = search.search(query, Integer.MAX_VALUE);
			if(topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
				return 0;
			}
			docs = topDocs.scoreDocs;
		} catch (IOException e) {
			e.printStackTrace();
		}
		return docs.length;
	}
	
	/**
	 * @Title: pageQuery
	 * @Description: Lucene分頁查詢
	 * @param searcher
	 * @param query
	 * @param page
	 * @throws IOException
	 */
	public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page) {
		int totalRecord = searchTotalRecord(searcher,query);
		//設定總記錄數
		page.setTotalRecord(totalRecord);
		TopDocs topDocs = null;
		try {
			topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
		} catch (IOException e) {
			e.printStackTrace();
		}
		List<Document> docList = new ArrayList<Document>();
		ScoreDoc[] docs = topDocs.scoreDocs;
		int index = 0;
		for (ScoreDoc scoreDoc : docs) {
			int docID = scoreDoc.doc;
			Document document = null;
			try {
				document = searcher.doc(docID);
			} catch (IOException e) {
				e.printStackTrace();
			}
			if(index == docs.length - 1) {
				page.setAfterDoc(scoreDoc);
				page.setAfterDocId(docID);
			}
			docList.add(document);
			index++;
		}
		page.setItems(docList);
		closeIndexReader(searcher.getIndexReader());
	}
	
	/**
	 * @Title: pageQuery
	 * @Description: 分頁查詢[如果設定了高亮,則會更新索引文件]
	 * @param searcher
	 * @param directory
	 * @param query
	 * @param page
	 * @param highlighterParam
	 * @param writerConfig
	 * @throws IOException
	 */
	public static void pageQuery(IndexSearcher searcher,Directory directory,Query query,Page<Document> page,HighlighterParam highlighterParam,IndexWriterConfig writerConfig) throws IOException {
		IndexWriter writer = null;
		//若未設定高亮
		if(null == highlighterParam || !highlighterParam.isHighlight()) {
			pageQuery(searcher,directory,query, page);
		} else {
			int totalRecord = searchTotalRecord(searcher,query);
			System.out.println("totalRecord:" + totalRecord);
			//設定總記錄數
			page.setTotalRecord(totalRecord);
			TopDocs topDocs = searcher.searchAfter(page.getAfterDoc(),query, page.getPageSize());
			List<Document> docList = new ArrayList<Document>();
			ScoreDoc[] docs = topDocs.scoreDocs;
			int index = 0;
			writer = getIndexWrtier(directory, writerConfig);
			for (ScoreDoc scoreDoc : docs) {
				int docID = scoreDoc.doc;
				Document document = searcher.doc(docID);
				String content = document.get(highlighterParam.getFieldName());
				if(null != content && content.trim().length() > 0) {
					//建立高亮器
					Highlighter highlighter = LuceneUtils.createHighlighter(query, 
						highlighterParam.getPrefix(), highlighterParam.getStuffix(), 
						highlighterParam.getFragmenterLength());
					String text = highlight(document, highlighter, analyzer, highlighterParam.getFieldName());
					//若高亮後跟原始文字不相同,表示高亮成功
					if(!text.equals(content)) {
						Document tempdocument = new Document();
						List<IndexableField> indexableFieldList = document.getFields();
						if(null != indexableFieldList && indexableFieldList.size() > 0) {
							for(IndexableField field : indexableFieldList) {
								if(field.name().equals(highlighterParam.getFieldName())) {
									tempdocument.add(new TextField(field.name(), text, Field.Store.YES));
								} else {
									tempdocument.add(field);
								}
							}
						}
						updateIndex(writer, new Term(highlighterParam.getFieldName(),content), tempdocument);
						document = tempdocument;
					}
				}
				if(index == docs.length - 1) {
					page.setAfterDoc(scoreDoc);
					page.setAfterDocId(docID);
				}
				docList.add(document);
				index++;
			}
			page.setItems(docList);
		}
		closeIndexReader(searcher.getIndexReader());
		closeIndexWriter(writer);
	}
}
package com.yida.framework.lucene5.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.ScoreDoc;
public class Page<T> {
	/**當前第幾頁(從1開始計算)*/
	private int currentPage;
	/**每頁顯示幾條*/
	private int pageSize;
	/**總記錄數*/
	private int totalRecord;
	/**總頁數*/
	private int totalPage;
	/**分頁資料集合[用泛型T來限定集合元素型別]*/
	private Collection<T> items;
	/**當前顯示起始索引(從零開始計算)*/
	private int startIndex;
	/**當前顯示結束索引(從零開始計算)*/
	private int endIndex;
	/**一組最多顯示幾個頁碼[比如Google一組最多顯示10個頁碼]*/
	private int groupSize;
	/**左邊偏移量*/
	private int leftOffset = 5;
	/**右邊偏移量*/
	private int rightOffset = 4;
	/**當前頁碼範圍*/
	private String[] pageRange;
	/**分頁資料*/
	private List<Document> docList;
	/**上一頁最後一個ScoreDoc物件*/
	private ScoreDoc afterDoc;
	/**上一頁最後一個ScoreDoc物件的Document物件ID*/
	private int afterDocId;
	public void setRangeIndex() {
		int groupSize = getGroupSize();
		int totalPage = getTotalPage();
		if(totalPage < 2) {
			startIndex = 0;
			endIndex = totalPage - startIndex;
		} else {
			int currentPage = getCurrentPage();