1. 程式人生 > >ES高階查詢,高亮顯示

ES高階查詢,高亮顯示

package xxx.xxx.xxx.xxx;

import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Map;

/**
 * ElasticSearch高階查詢基本操作
 *
 *  操作的入口TransportClient
 */
public class ElasticSearchQuery {
    private TransportClient tc;
    private int port = 9300;
    private String clusterName = "bigdata";
    private String node_1 = "hadoop01";
    private String node_2 = "hadoop02";
    private String node_3 = "hadoop03";
    private String[] indices = {"bigdata","bank"};
    @Before
    public void setUp() throws UnknownHostException {
        Settings setting = Settings.builder()
                .put("cluster.name",clusterName)
                .put("analyzer","ik")
                .build();
        tc = TransportClient.builder().settings(setting).build();
        TransportAddress hadoop = new InetSocketTransportAddress(InetAddress.getByName(node_1),port);
        TransportAddress spark = new InetSocketTransportAddress(InetAddress.getByName(node_2),port);
        TransportAddress storm = new InetSocketTransportAddress(InetAddress.getByName(node_3),port);
        tc.addTransportAddresses(hadoop,spark,storm);
    }

    /**
     * 查詢包涵Apache的資料資訊
     */
    @Test
    public void testSearch(){
//        tc.prepareSearch(indices)
//                .setSearchType(SearchType.DEFAULT)
//                .setQuery(QueryBuilders.matchQuery("author", "Apache"))
//                .get();
        SearchResponse response = tc.prepareSearch(indices)//設定要進行查詢的方式,有4中查詢方式:QUERY_AND_FETCH、QUERY_THEN_FETCH、DFS_QUERY_AND_FETCH、DFS_QUERY_THEN_FETCH
                .setSearchType(SearchType.DEFAULT)//設定查詢的內容,和在哪些欄位中進行查詢,要通過QueryBuilders來進行設定
                .setQuery(QueryBuilders.matchQuery("author", "Apache"))
                .get();
        SearchHits hits = response.getHits();
        long totalHits = hits.getTotalHits();
        System.out.println("老王為您找到相關結果約 " + totalHits);
        for(SearchHit searchHit:hits){
            printSearchHit(searchHit);
        }
    }

    @Test
    public void testSearch_02(){
        SearchResponse searchResponse = tc.prepareSearch(indices)
                .setSearchType(SearchType.DEFAULT)
                .setQuery(QueryBuilders.prefixQuery("name", "h"))
                .addSort("name", SortOrder.DESC)
                .setFrom(0)
                .setSize(10)
                .get();
        SearchHits hits = searchResponse.getHits();
        long totalHits = hits.getTotalHits();
        System.out.println("老王為您找到相關結果約 " + totalHits);
        for(SearchHit searchHit: hits){
            printSearchHit(searchHit);
        }
    }

    /**
     * 查詢年齡在35到40之間的成員資訊
     */
    @Test
    public void testSearch_03(){
        SearchResponse searchResponse = tc.prepareSearch(indices)
                .setSearchType(SearchType.DEFAULT)
                .setQuery(QueryBuilders.rangeQuery("age").gte(35).lte(40))
                .addSort("balance", SortOrder.DESC)
                .setFrom(0)
                .setSize(2)
                .get();
        SearchHits hits = searchResponse.getHits();
        long totalHits = hits.getTotalHits();
        System.out.println("老王為您找到相關結果約 " + totalHits);
        for(SearchHit searchHit:hits){
            printSearchHit(searchHit);
        }
    }

    /**
     * 執行聚合操作
     * 查詢年齡在35到40之間的平均年齡
     */
    @Test
    public void testSearch_04(){
        SearchResponse searchResponse = tc.prepareSearch(indices)
                .setSearchType(SearchType.DEFAULT)
                .setQuery(QueryBuilders.rangeQuery("age").gte(35).lte(40))
                .addAggregation(AggregationBuilders.avg("avg_age").field("age"))
                .get();
        SearchHits hits = searchResponse.getHits();
        long totalHits = hits.getTotalHits();
        System.out.println("老王為您找到相關結果約 " + totalHits);
        Aggregations aggregations = searchResponse.getAggregations();
        Map<String, Aggregation> aggregationMap = aggregations.asMap();
        for(Map.Entry<String, Aggregation> map:aggregationMap.entrySet()){
            String key = map.getKey();
            //由Aggregations---->強轉為InternalAvg,才拿到平均值
            InternalAvg aggregation = (InternalAvg)map.getValue();
            System.out.println("key = " + key);
            System.out.println("aggregation = " + aggregation.getValue());
        }
    }

    /**
     * 程式碼的高亮顯示
     * HighLight
     * <font color='red'>keyword</font>
     * 對於這種高亮顯示,就是在查詢關鍵字之前和之後分別追加字首和字尾
     * 字首:pre
     * 字尾:post
     *
     * 設定高亮注意的地方:
     *  1、設定高亮欄位:addHighlightedField
     *  2、設定高亮的字首和字尾:
     *      setHighlighterPreTags("<font color='red'>")
     *      setHighlighterPostTags("</font>")
     *  3、獲取高亮資料
     *      searchHit.getHighlightFields()中的fragment獲取
     */
    @Test
    public void testSearch_05(){
        SearchResponse response = tc.prepareSearch(indices)
                .setSearchType(SearchType.DEFAULT)
                .setQuery(QueryBuilders.matchQuery("name", "hadoop02"))
                .addHighlightedField("name")
                .setHighlighterPreTags("<font color='red'>")
                .setHighlighterPostTags("</font>")
                .get();
        SearchHits searchHits = response.getHits();
        long totalHits = searchHits.getTotalHits();
        System.out.println("老王為您找到相關結果約 " + totalHits);
        for(SearchHit searchHit : searchHits){
            System.out.println(" ================== " );
            Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
            for(Map.Entry<String, HighlightField> me : highlightFields.entrySet()){
                String key = me.getKey();
                System.out.println("key = " + key);
                HighlightField highlightField = me.getValue();
                String name = highlightField.getName();
                System.out.println("name = " + name);
                Text[] fragments = highlightField.getFragments();
                String hightStr = "";
                for(Text text : fragments){
                    hightStr += text.toString();
                }
                System.out.println("hightStr = " + hightStr);
            }
        }
    }


    /**
     * 查詢中文
     *
     * 分詞:
     *  我們發現設定Query為termQuery,QueryBuilders.termQuery("author", "孫")有結果
     *  而Query為termQuery,QueryBuilders.termQuery("author", "孫鵬")沒有結果
     *  這是因為中文分詞的原因,英文分詞非常簡單:就是通過一個個的空格進行拆分,而漢字的拆分預設情況下就是一個字一個字的拆
     *
     *  我們要想查詢孫鵬 或者鵬飛這些片語的時候,預設的拆分方式就提供不了資料了,為了滿足使用者的需要,在軟體發展過程中就有非常多的
     *  各種各樣的分詞法(庖丁解牛分詞法,IK分詞法)
     *
     *  如果使用了IK等分詞法,對索引庫中已經存在的資料不執行相應的分詞,只有對新增的資料才會執行相應的分詞!!!
     */
    @Test
    public void testSearch_Chinese(){
        indices = new String[]{"bigdata"};
        SearchResponse response = tc.prepareSearch(indices)
                .setSearchType(SearchType.DEFAULT)      //奧利貓(ik之前)    法拉狗
                .setQuery(QueryBuilders.termQuery("author", "奧利"))
                .get();
        SearchHits hits = response.getHits();
        long totalHits = hits.getTotalHits();
        System.out.println("老王為您找到相關結果約 " + totalHits);
        for(SearchHit searchHit : hits){
            printSearchHit(searchHit);
        }
    }

    private void printSearchHit(SearchHit searchHit){
        String index = searchHit.getIndex();
        String type = searchHit.getType();
        String id = searchHit.getId();
        long version = searchHit.getVersion();
        float score = searchHit.getScore();
        Map<String, Object> source = searchHit.getSource();
        System.out.println("index "+index);
        System.out.println("type "+type);
        System.out.println("id "+id);
        System.out.println("version "+version);
        System.out.println("score "+score);
        System.out.println("source "+source);
    }

    @After
    public void cleanUp(){
        if(tc != null){
            tc.close();
        }
    }
}