lucene分組查詢的簡單使用
阿新 • • 發佈:2018-12-10
網上介紹的Lucene分組查詢的過程大多比較複雜,這裡提供一個較為簡單的實現,可以滿足基本的分組查詢需求。
1.首先引入依賴
<!--組查詢-->
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-grouping -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-grouping</artifactId >
<version>7.2.1</version>
</dependency>
2.建立索引
/**
* 新增索引文件
*
* @param groupField
* @param writer
* @throws IOException
*/
public static void addDocuments(String groupField, IndexWriter writer)
throws IOException {
// 0
Document doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new StringField("author", "author1", Field.Store.YES));
doc.add(new TextField("content", "random text", Field.Store.YES));
doc.add(new StringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
// 1
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new StringField("author", "author1", Field.Store.YES));
doc.add(new TextField("content", "some more random text",
Field.Store.YES));
doc.add(new StringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
// 2
doc = new Document();
addGroupField(doc, groupField, "author1");
doc.add(new StringField("author", "author1", Field.Store.YES));
doc.add(new TextField("content", "some more random textual data",
Field.Store.YES));
doc.add(new StringField("id", "3", Field.Store.YES));
writer.addDocument(doc);
// 3
doc = new Document();
addGroupField(doc, groupField, "author2");
doc.add(new StringField("author", "author2", Field.Store.YES));
doc.add(new TextField("content", "some random text", Field.Store.YES));
doc.add(new StringField("id", "4", Field.Store.YES));
writer.addDocument(doc);
// 4
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new StringField("author", "author3", Field.Store.YES));
doc.add(new TextField("content", "some more random text",
Field.Store.YES));
doc.add(new StringField("id", "5", Field.Store.YES));
writer.addDocument(doc);
// 5
doc = new Document();
addGroupField(doc, groupField, "author3");
doc.add(new StringField("author", "author3", Field.Store.YES));
doc.add(new TextField("content", "random", Field.Store.YES));
doc.add(new StringField("id", "6", Field.Store.YES));
writer.addDocument(doc);
// 6 -- no author field
doc = new Document();
doc.add(new StringField("author", "author4", Field.Store.YES));
doc.add(new TextField("content",
"random word stuck in alot of other text", Field.Store.YES));
doc.add(new StringField("id", "6", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
writer.close();
}
/**
* 新增分組域
*
* @param doc
* 索引文件
* @param groupField
* 需要分組的域名稱
* @param value
* 域值
*/
private static void addGroupField(Document doc, String groupField,
String value) {
//進行分組的域上建立的必須是SortedDocValuesField型別
doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
}
3.對查詢分組,一些坑以及要點註釋已經說明的很清楚了
/**
* 測試lucene7環境下的分組查詢
*/
@Test
public void lucene7GroupBy() throws Exception{
GroupingSearch groupingSearch = new GroupingSearch(groupField);//指定要進行分組的索引
groupingSearch.setGroupSort(new Sort(SortField.FIELD_SCORE));//指定分組排序規則
groupingSearch.setFillSortFields(true);//是否填充SearchGroup的sortValues
groupingSearch.setCachingInMB(4.0, true);
groupingSearch.setAllGroups(true);
//groupingSearch.setAllGroupHeads(true);
groupingSearch.setGroupDocsLimit(10);//限制分組個數
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("content", analyzer);
String queryExpression = "some content";
Query query = parser.parse(queryExpression);
Directory directory = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
//在content索引上對包含some與content分詞的索引進行具體查詢,結果按照author索引的內容進行分組
TopGroups<BytesRef> result = groupingSearch.search(searcher, query, 0, 1000);
//總命中數
System.out.println("總命中數:"+result.totalHitCount);
//分組數
System.out.println("分組數:"+result.groups.length);
//按照分組列印查詢結果
for (GroupDocs<BytesRef> groupDocs : result.groups){
if (groupDocs != null) {
if (groupDocs.groupValue != null) {
System.out.println("分組:" + groupDocs.groupValue.utf8ToString());
}else{
//由於建立索引時有一條資料沒有在分組索引上建立SortedDocValued索引,因此這個分組的groupValue為null
System.out.println("分組:" + "unknow");
}
System.out.println("組內資料條數:" + groupDocs.totalHits);
for(ScoreDoc scoreDoc : groupDocs.scoreDocs){
System.out.println("author:" + searcher.doc(scoreDoc.doc).get("author"));
System.out.println("content:" + searcher.doc(scoreDoc.doc).get("content"));
System.out.println();
}
System.out.println("=====================================");
}
}
}
完整程式碼可以參考我的github:github