1. 程式人生 > >lucene學習之針對多索引的搜尋

lucene學習之針對多索引的搜尋

在實際應用中,很多應用程式保持多個分離的Lucene索引,但有需要在搜尋過程中能夠將結果合併輸出,比如新聞網站每天都會建立不同索引,但是搜尋一個月的新聞時就需要合併輸出結果。這時可以使用如下方式:

mreader = new MultiReader(readera,readern);
        searcher = new IndexSearcher(mreader);//4.0以後的MultiSearcher替換成這樣

可以看到我們需要使用MultiReader這個類,將讀不同索引的reader封裝在一塊。
下面是實現程式碼

import java.io.IOException
; import junit.framework.TestCase; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache
.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.search.*; import org.apache.lucene.store.*; public class MultiSearcherTest extends TestCase{ private static IndexSearcher searcher;
static IndexReader readera; static IndexReader readern; static MultiReader mreader; public static void testMulti() throws IOException{ String[] animals = { "aardvark","beaver","coati","dog", "elephant","frog","gila monster", "horse","iguana","javelina","kangroo", "lemur","moose","rematode","orca", "python","quoka","rat","scorpion", "tarantula","uromastyx","vicuna", "walrus","xiphias","yak","zebra" }; Analyzer analyzer = new WhitespaceAnalyzer(); Directory aTOmDirectory = new RAMDirectory();//建立兩個目錄 Directory nTOzDirectory = new RAMDirectory(); IndexWriterConfig configa = new IndexWriterConfig(analyzer); IndexWriterConfig confign = new IndexWriterConfig(analyzer); IndexWriter aTOmWriter = new IndexWriter(aTOmDirectory,configa); IndexWriter nTOzWriter = new IndexWriter(nTOzDirectory,confign); for(int i = animals.length - 1;i >= 0;i--){ Document doc = new Document(); String animal = animals[i]; doc.add(new StringField("animal",animal,Field.Store.YES)); if(animal.charAt(0) < 'n'){ aTOmWriter.addDocument(doc); }else{ nTOzWriter.addDocument(doc); } } readera = DirectoryReader.open(aTOmWriter,true); readern = DirectoryReader.open(nTOzWriter,true); aTOmWriter.close(); nTOzWriter.close(); mreader = new MultiReader(readera,readern); searcher = new IndexSearcher(mreader);//4.0以後的MultiSearcher替換成這樣 TermRangeQuery query = TermRangeQuery.newStringRange("animal","h","t",true,true); TopDocs hits = searcher.search(query, 10); System.out.println("一共搜尋到結果:"+hits.totalHits+"條"); for(ScoreDoc s:hits.scoreDocs){ Document doc = searcher.doc(s.doc);//通過序號得到檔案 System.out.println("序號為"+s.doc+" "+"animal:"+doc.get("animal")); } assertEquals("tarantula not included",12,hits.totalHits); } public static void main(String args[]) throws IOException{ testMulti(); } }

TermRangeQuery類查詢包含從h到t開頭的動物名稱,匹配的文件來自於兩個不同的索引。結果如下:
執行結果
當然上面程式是一個searcher單執行緒操作,也可以使用多執行緒的辦法,lucene5中提供了ParalleLeafReader類。