1. 程式人生 > >Elasticsearch原始碼分析十三--高亮顯示highlight

Elasticsearch原始碼分析十三--高亮顯示highlight

  • 簡介
  • 查詢語法
  • 原始碼分析

簡介

高亮顯示是在結果文件中顯示查詢中的哪個或哪些單詞被匹配的過程。
Elasticsearch底層使用Apache Lucene。 Lucene提供了三種類型的高亮實現:
第一種是標準型別(本文例子);第二種叫FastVectorHighlighter,
它需要詞向量和位置才能工作;第三種叫PostingsHighlighter。
Elasticsearch自動選擇正確的高亮實現方式:如果欄位的配置中,
term_vector屬性設成了with_positions_offsets,則將使用FastVectorHighlighter。
使用詞向量將導致索引變大,但高亮顯示的執行需要更少的時間。此外,
對於儲存了大量資料的欄位來說,推薦使用FastVectorHighlighter

查詢語法

例如:高亮顯示在title欄位中匹配的單詞,注意highlight部分和query部分位於JSON中的同一層,
也可以看做第一層。在Elasticsearch程式碼中,位於JSON第一層的query、highlight等叫做Element。

 {
    "query" : {
       "term" : {
            "title" : "crime"
        }
    },
    "highlight" : {
        "pre_tags" : [ "<b>" ],
        "post_tags" : [ "</b>" ],
        "fields" : {
            "title" : {}
        }
    }
}

該查詢的結果如下,結果中除標準返回資訊外,還有一個highlight部分,
該部分使用<b>這個HTML標籤來包含高亮部分,高亮由pre_tags和post_tags屬性指定,
預設使用<em>標籤。

{
    "took" : 2,
    "timed_out" : false,
    "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
},
"hits" : {
    "total" : 1,
    "max_score" : 0.19178301,
    "hits" : [ {
        "_index" : "library",
        "_type" : "book",
        "_id" : "4",
        "_score" : 0.19178301, 
            { 
                "title": "Crime and Punishment",
                "characters": ["Raskolnikov"],
                "tags": [],
                "copies": 0, "available" : true},
                "highlight" : {
                    "title" : [ "**<b>Crime</b>** and Punishment" ]
            }} ]    }}

原始碼分析

'''(1)Elasticsearch code:註冊fetchPhase中元素的解析方法'''
public class SearchService extends AbstractLifecycleComponent<SearchService> {

    private final ImmutableMap<String, SearchParseElement> elementParsers;

    public SearchService(Settings settings, ClusterService clusterService, IndicesService indicesService, IndicesLifecycle indicesLifecycle, IndicesWarmer indicesWarmer, ThreadPool threadPool,
                         ScriptService scriptService, CacheRecycler cacheRecycler, DfsPhase dfsPhase, QueryPhase queryPhase, FetchPhase fetchPhase) {
        super(settings);
        this.threadPool = threadPool;
        '''省略....'''
        '''在此註冊所有元素的解析方法'''
        Map<String, SearchParseElement> elementParsers = new HashMap<String, SearchParseElement>();
        elementParsers.putAll(dfsPhase.parseElements());
        elementParsers.putAll(queryPhase.parseElements());
        elementParsers.putAll(fetchPhase.parseElements());
        elementParsers.put("stats", new StatsGroupsParseElement());
        this.elementParsers = ImmutableMap.copyOf(elementParsers);
        indicesLifecycle.addListener(indicesLifecycleListener);

        this.keepAliveReaper = threadPool.scheduleWithFixedDelay(new Reaper(), keepAliveInterval);

        this.indicesWarmer.addListener(new SearchWarmer());
    }

}
'''(2)Elasticsearch code:在FetchPhase中註冊highlight的解析方法'''
public class FetchPhase implements SearchPhase {

    private final FetchSubPhase[] fetchSubPhases;

    @Inject
    '''HighlightPhase高亮顯示'''
    public FetchPhase(HighlightPhase highlightPhase, ScriptFieldsFetchSubPhase scriptFieldsPhase, PartialFieldsFetchSubPhase partialFieldsPhase,
                      MatchedFiltersFetchSubPhase matchFiltersPhase, ExplainFetchSubPhase explainPhase, VersionFetchSubPhase versionPhase) {
        this.fetchSubPhases = new FetchSubPhase[]{scriptFieldsPhase, partialFieldsPhase, matchFiltersPhase, explainPhase, highlightPhase, versionPhase};
    }

    @Override
    public Map<String, ? extends SearchParseElement> parseElements() {
        ImmutableMap.Builder<String, SearchParseElement> parseElements = ImmutableMap.builder();
        parseElements.put("fields", new FieldsParseElement());
        for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
            parseElements.putAll(fetchSubPhase.parseElements());
        }
        return parseElements.build();
    }
}
'''(3)Elasticsearch code:在FetchPhase中註冊highlight的解析例項HighlighterParseElement'''
public class HighlightPhase extends AbstractComponent implements FetchSubPhase {

    @Override
    public Map<String, ? extends SearchParseElement> parseElements() {
        return ImmutableMap.of("highlight", new HighlighterParseElement());
    }
} 

'''(4)Elasticsearch code:在FetchPhase中註冊highlight的解析例項HighlighterParseElement'''
/**
 * <pre>
 * highlight : {
 *  tags_schema : "styled",
 *  pre_tags : ["tag1", "tag2"],
 *  post_tags : ["tag1", "tag2"],
 *  order : "score",
 *  highlight_filter : true,
 *  fields : {
 *      field1 : {  },
 *      field2 : { fragment_size : 100, number_of_fragments : 2 },
 *      field3 : { number_of_fragments : 5, order : "simple", tags_schema : "styled" },
 *      field4 : { number_of_fragments: 0, pre_tags : ["openingTagA", "openingTagB"], post_tags : ["closingTag"] }
 *  }
 * }
 * </pre>
 */
public class HighlighterParseElement implements SearchParseElement {

        '''預設高亮顯示的HTML標籤'''
    private static final String[] DEFAULT_PRE_TAGS = new String[]{"<em>"};
    private static final String[] DEFAULT_POST_TAGS = new String[]{"</em>"};

    private static final String[] STYLED_PRE_TAG = {
            "<em class=\"hlt1\">", "<em class=\"hlt2\">", "<em class=\"hlt3\">",
            "<em class=\"hlt4\">", "<em class=\"hlt5\">", "<em class=\"hlt6\">",
            "<em class=\"hlt7\">", "<em class=\"hlt8\">", "<em class=\"hlt9\">",
            "<em class=\"hlt10\">"
    };
    private static final String[] STYLED_POST_TAGS = {"</em>"};

    @Override
    public void parse(XContentParser parser, SearchContext context) throws Exception {
        XContentParser.Token token;
        String topLevelFieldName = null;
        List<SearchContextHighlight.Field> fields = newArrayList();

        String[] globalPreTags = DEFAULT_PRE_TAGS;
        String[] globalPostTags = DEFAULT_POST_TAGS;
        ......
        String globalHighlighterType = null;
        String globalFragmenter = null;
        Map<String, Object> globalOptions = null;

        '''此處的parser是JsonXContentParser例項'''
        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
            if (token == XContentParser.Token.FIELD_NAME) {
                topLevelFieldName = parser.currentName();
            } else if (token == XContentParser.Token.START_ARRAY) {
                if ("pre_tags".equals(topLevelFieldName) || "preTags".equals(topLevelFieldName)) {
                    List<String> preTagsList = Lists.newArrayList();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        preTagsList.add(parser.text());
                    }
                    globalPreTags = preTagsList.toArray(new String[preTagsList.size()]);
                } else if ("post_tags".equals(topLevelFieldName) || "postTags".equals(topLevelFieldName)) {
                    List<String> postTagsList = Lists.newArrayList();
                    while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
                        postTagsList.add(parser.text());
                    }
                    globalPostTags = postTagsList.toArray(new String[postTagsList.size()]);
                }
            } else if (token.isValue()) {
                if ("order".equals(topLevelFieldName)) {
                    globalScoreOrdered = "score".equals(parser.text());
                } else if ("tags_schema".equals(topLevelFieldName) || "tagsSchema".equals(topLevelFieldName)) {
                    String schema = parser.text();
                    if ("styled".equals(schema)) {
                        globalPreTags = STYLED_PRE_TAG;
                        globalPostTags = STYLED_POST_TAGS;
                    }
          '''省略.....'''