ES高階查詢,高亮顯示
阿新 • • 發佈:2019-01-28
package xxx.xxx.xxx.xxx;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Map;
/**
* ElasticSearch高階查詢基本操作
*
* 操作的入口TransportClient
*/
public class ElasticSearchQuery {
private TransportClient tc;
private int port = 9300;
private String clusterName = "bigdata";
private String node_1 = "hadoop01";
private String node_2 = "hadoop02";
private String node_3 = "hadoop03";
private String[] indices = {"bigdata","bank"};
@Before
public void setUp() throws UnknownHostException {
Settings setting = Settings.builder()
.put("cluster.name",clusterName)
.put("analyzer","ik")
.build();
tc = TransportClient.builder().settings(setting).build();
TransportAddress hadoop = new InetSocketTransportAddress(InetAddress.getByName(node_1),port);
TransportAddress spark = new InetSocketTransportAddress(InetAddress.getByName(node_2),port);
TransportAddress storm = new InetSocketTransportAddress(InetAddress.getByName(node_3),port);
tc.addTransportAddresses(hadoop,spark,storm);
}
/**
* 查詢包涵Apache的資料資訊
*/
@Test
public void testSearch(){
// tc.prepareSearch(indices)
// .setSearchType(SearchType.DEFAULT)
// .setQuery(QueryBuilders.matchQuery("author", "Apache"))
// .get();
SearchResponse response = tc.prepareSearch(indices)//設定要進行查詢的方式,有4中查詢方式:QUERY_AND_FETCH、QUERY_THEN_FETCH、DFS_QUERY_AND_FETCH、DFS_QUERY_THEN_FETCH
.setSearchType(SearchType.DEFAULT)//設定查詢的內容,和在哪些欄位中進行查詢,要通過QueryBuilders來進行設定
.setQuery(QueryBuilders.matchQuery("author", "Apache"))
.get();
SearchHits hits = response.getHits();
long totalHits = hits.getTotalHits();
System.out.println("老王為您找到相關結果約 " + totalHits);
for(SearchHit searchHit:hits){
printSearchHit(searchHit);
}
}
@Test
public void testSearch_02(){
SearchResponse searchResponse = tc.prepareSearch(indices)
.setSearchType(SearchType.DEFAULT)
.setQuery(QueryBuilders.prefixQuery("name", "h"))
.addSort("name", SortOrder.DESC)
.setFrom(0)
.setSize(10)
.get();
SearchHits hits = searchResponse.getHits();
long totalHits = hits.getTotalHits();
System.out.println("老王為您找到相關結果約 " + totalHits);
for(SearchHit searchHit: hits){
printSearchHit(searchHit);
}
}
/**
* 查詢年齡在35到40之間的成員資訊
*/
@Test
public void testSearch_03(){
SearchResponse searchResponse = tc.prepareSearch(indices)
.setSearchType(SearchType.DEFAULT)
.setQuery(QueryBuilders.rangeQuery("age").gte(35).lte(40))
.addSort("balance", SortOrder.DESC)
.setFrom(0)
.setSize(2)
.get();
SearchHits hits = searchResponse.getHits();
long totalHits = hits.getTotalHits();
System.out.println("老王為您找到相關結果約 " + totalHits);
for(SearchHit searchHit:hits){
printSearchHit(searchHit);
}
}
/**
* 執行聚合操作
* 查詢年齡在35到40之間的平均年齡
*/
@Test
public void testSearch_04(){
SearchResponse searchResponse = tc.prepareSearch(indices)
.setSearchType(SearchType.DEFAULT)
.setQuery(QueryBuilders.rangeQuery("age").gte(35).lte(40))
.addAggregation(AggregationBuilders.avg("avg_age").field("age"))
.get();
SearchHits hits = searchResponse.getHits();
long totalHits = hits.getTotalHits();
System.out.println("老王為您找到相關結果約 " + totalHits);
Aggregations aggregations = searchResponse.getAggregations();
Map<String, Aggregation> aggregationMap = aggregations.asMap();
for(Map.Entry<String, Aggregation> map:aggregationMap.entrySet()){
String key = map.getKey();
//由Aggregations---->強轉為InternalAvg,才拿到平均值
InternalAvg aggregation = (InternalAvg)map.getValue();
System.out.println("key = " + key);
System.out.println("aggregation = " + aggregation.getValue());
}
}
/**
* 程式碼的高亮顯示
* HighLight
* <font color='red'>keyword</font>
* 對於這種高亮顯示,就是在查詢關鍵字之前和之後分別追加字首和字尾
* 字首:pre
* 字尾:post
*
* 設定高亮注意的地方:
* 1、設定高亮欄位:addHighlightedField
* 2、設定高亮的字首和字尾:
* setHighlighterPreTags("<font color='red'>")
* setHighlighterPostTags("</font>")
* 3、獲取高亮資料
* searchHit.getHighlightFields()中的fragment獲取
*/
@Test
public void testSearch_05(){
SearchResponse response = tc.prepareSearch(indices)
.setSearchType(SearchType.DEFAULT)
.setQuery(QueryBuilders.matchQuery("name", "hadoop02"))
.addHighlightedField("name")
.setHighlighterPreTags("<font color='red'>")
.setHighlighterPostTags("</font>")
.get();
SearchHits searchHits = response.getHits();
long totalHits = searchHits.getTotalHits();
System.out.println("老王為您找到相關結果約 " + totalHits);
for(SearchHit searchHit : searchHits){
System.out.println(" ================== " );
Map<String, HighlightField> highlightFields = searchHit.getHighlightFields();
for(Map.Entry<String, HighlightField> me : highlightFields.entrySet()){
String key = me.getKey();
System.out.println("key = " + key);
HighlightField highlightField = me.getValue();
String name = highlightField.getName();
System.out.println("name = " + name);
Text[] fragments = highlightField.getFragments();
String hightStr = "";
for(Text text : fragments){
hightStr += text.toString();
}
System.out.println("hightStr = " + hightStr);
}
}
}
/**
* 查詢中文
*
* 分詞:
* 我們發現設定Query為termQuery,QueryBuilders.termQuery("author", "孫")有結果
* 而Query為termQuery,QueryBuilders.termQuery("author", "孫鵬")沒有結果
* 這是因為中文分詞的原因,英文分詞非常簡單:就是通過一個個的空格進行拆分,而漢字的拆分預設情況下就是一個字一個字的拆
*
* 我們要想查詢孫鵬 或者鵬飛這些片語的時候,預設的拆分方式就提供不了資料了,為了滿足使用者的需要,在軟體發展過程中就有非常多的
* 各種各樣的分詞法(庖丁解牛分詞法,IK分詞法)
*
* 如果使用了IK等分詞法,對索引庫中已經存在的資料不執行相應的分詞,只有對新增的資料才會執行相應的分詞!!!
*/
@Test
public void testSearch_Chinese(){
indices = new String[]{"bigdata"};
SearchResponse response = tc.prepareSearch(indices)
.setSearchType(SearchType.DEFAULT) //奧利貓(ik之前) 法拉狗
.setQuery(QueryBuilders.termQuery("author", "奧利"))
.get();
SearchHits hits = response.getHits();
long totalHits = hits.getTotalHits();
System.out.println("老王為您找到相關結果約 " + totalHits);
for(SearchHit searchHit : hits){
printSearchHit(searchHit);
}
}
private void printSearchHit(SearchHit searchHit){
String index = searchHit.getIndex();
String type = searchHit.getType();
String id = searchHit.getId();
long version = searchHit.getVersion();
float score = searchHit.getScore();
Map<String, Object> source = searchHit.getSource();
System.out.println("index "+index);
System.out.println("type "+type);
System.out.println("id "+id);
System.out.println("version "+version);
System.out.println("score "+score);
System.out.println("source "+source);
}
@After
public void cleanUp(){
if(tc != null){
tc.close();
}
}
}