1. 程式人生 > >lucene分組查詢的簡單使用

lucene分組查詢的簡單使用

網上介紹的Lucene分組查詢的過程大多比較複雜,這裡提供一個較為簡單的實現,可以滿足基本的分組查詢需求。

1.首先引入依賴

    <!--組查詢-->
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-grouping -->
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-grouping</artifactId
>
<version>7.2.1</version> </dependency>

2.建立索引

 /**
     * 新增索引文件
     *
     * @param groupField
     * @param writer
     * @throws IOException
     */
    public static void addDocuments(String groupField, IndexWriter writer)
            throws IOException {
        // 0
Document doc = new Document(); addGroupField(doc, groupField, "author1"); doc.add(new StringField("author", "author1", Field.Store.YES)); doc.add(new TextField("content", "random text", Field.Store.YES)); doc.add(new StringField("id", "1", Field.Store.YES)); writer.addDocument(doc); // 1
doc = new Document(); addGroupField(doc, groupField, "author1"); doc.add(new StringField("author", "author1", Field.Store.YES)); doc.add(new TextField("content", "some more random text", Field.Store.YES)); doc.add(new StringField("id", "2", Field.Store.YES)); writer.addDocument(doc); // 2 doc = new Document(); addGroupField(doc, groupField, "author1"); doc.add(new StringField("author", "author1", Field.Store.YES)); doc.add(new TextField("content", "some more random textual data", Field.Store.YES)); doc.add(new StringField("id", "3", Field.Store.YES)); writer.addDocument(doc); // 3 doc = new Document(); addGroupField(doc, groupField, "author2"); doc.add(new StringField("author", "author2", Field.Store.YES)); doc.add(new TextField("content", "some random text", Field.Store.YES)); doc.add(new StringField("id", "4", Field.Store.YES)); writer.addDocument(doc); // 4 doc = new Document(); addGroupField(doc, groupField, "author3"); doc.add(new StringField("author", "author3", Field.Store.YES)); doc.add(new TextField("content", "some more random text", Field.Store.YES)); doc.add(new StringField("id", "5", Field.Store.YES)); writer.addDocument(doc); // 5 doc = new Document(); addGroupField(doc, groupField, "author3"); doc.add(new StringField("author", "author3", Field.Store.YES)); doc.add(new TextField("content", "random", Field.Store.YES)); doc.add(new StringField("id", "6", Field.Store.YES)); writer.addDocument(doc); // 6 -- no author field doc = new Document(); doc.add(new StringField("author", "author4", Field.Store.YES)); doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); doc.add(new StringField("id", "6", Field.Store.YES)); writer.addDocument(doc); writer.commit(); writer.close(); } /** * 新增分組域 * * @param doc * 索引文件 * @param groupField * 需要分組的域名稱 * @param value * 域值 */ private static void addGroupField(Document doc, String groupField, String value) { //進行分組的域上建立的必須是SortedDocValuesField型別 doc.add(new SortedDocValuesField(groupField, new BytesRef(value))); }

3.對查詢分組,一些坑以及要點註釋已經說明的很清楚了

 /**
     * 測試lucene7環境下的分組查詢
     */
    @Test
    public void lucene7GroupBy() throws Exception{
        GroupingSearch groupingSearch = new GroupingSearch(groupField);//指定要進行分組的索引
        groupingSearch.setGroupSort(new Sort(SortField.FIELD_SCORE));//指定分組排序規則
        groupingSearch.setFillSortFields(true);//是否填充SearchGroup的sortValues
        groupingSearch.setCachingInMB(4.0, true);
        groupingSearch.setAllGroups(true);
        //groupingSearch.setAllGroupHeads(true);
        groupingSearch.setGroupDocsLimit(10);//限制分組個數

        Analyzer analyzer = new StandardAnalyzer();
        QueryParser parser = new QueryParser("content", analyzer);
        String queryExpression = "some content";
        Query query = parser.parse(queryExpression);
        Directory directory = FSDirectory.open(Paths.get(indexDir));
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        //在content索引上對包含some與content分詞的索引進行具體查詢,結果按照author索引的內容進行分組
        TopGroups<BytesRef> result = groupingSearch.search(searcher, query, 0, 1000);

        //總命中數
        System.out.println("總命中數:"+result.totalHitCount);
        //分組數
        System.out.println("分組數:"+result.groups.length);
        //按照分組列印查詢結果
        for (GroupDocs<BytesRef> groupDocs : result.groups){
            if (groupDocs != null) {
                if (groupDocs.groupValue != null) {
                    System.out.println("分組:" + groupDocs.groupValue.utf8ToString());
                }else{
                    //由於建立索引時有一條資料沒有在分組索引上建立SortedDocValued索引,因此這個分組的groupValue為null
                    System.out.println("分組:" + "unknow");
                }
                System.out.println("組內資料條數:" + groupDocs.totalHits);

                for(ScoreDoc scoreDoc : groupDocs.scoreDocs){
                    System.out.println("author:" + searcher.doc(scoreDoc.doc).get("author"));
                    System.out.println("content:" + searcher.doc(scoreDoc.doc).get("content"));
                    System.out.println();
                }

                System.out.println("=====================================");
            }
        }
    }

完整程式碼可以參考我的github:github