ES標籤搜尋並解決評分排序問題
阿新 • • 發佈:2020-12-30
一、概述
需求:
最近在做一個新聞專案,有這樣一個需求:
- 使用者根據視訊內容手動建立標籤,標籤個數不限
- 在視訊詳情頁提供根據標籤推薦視訊功能,即按本視訊的標籤進行搜尋,標籤匹配多的排在前面,匹配少的排在後面
- 欄位為keyword型別,資料以陣列儲存,未找到可實現此功能的檢索方式
- 欄位為text型別,多個標籤以空格隔開或者陣列儲存,使用match搜尋,資料評分不準確
- 欄位為text型別,多個標籤以空格隔開或者陣列儲存,使用match結合match_phrase搜尋,資料評分扔不準確
1. 欄位為text型別,指定分詞器為whitespace,以空格分隔標籤
"mediaTag" : { "type" : "text", "analyzer": "whitespace" }
2. 欄位為text型別,指定分詞器為pattern,指定標籤分隔字元,以逗號分隔
PUT /es_medias_test2 { "settings": { "analysis": { "analyzer": { "comma": { //自定義分詞器名稱 "type": "pattern", "pattern": "," } } } }, "mappings": { "esmedias": { "properties": { "mediaTag": { "type": "text", "analyzer": "comma" } } } } }
二、可行方案測試(以可行方案一為例)
1. 建立索引PUT /es_medias_test2 { "settings": { "index": { "number_of_shards": "1", "number_of_replicas": "0" } }, "mappings": { "esmedias": { "properties": { "mediaTag" : { "type" : "text", "analyzer": "whitespace" } } } } }
2. 新增資料
POST /es_medias_test2/_bulk {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"o3kyp3YB_f4AQBwwbA7Q"}} {"mediaTag":"美國 英國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"lHk0p3YB_f4AQBwwvxBz"}} {"mediaTag":"英國 美國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"-Xk1p3YB_f4AQBwwNRBt"}} {"mediaTag":"美國 法國 英國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"AXlYp3YB_f4AQBww9zDT"}} {"mediaTag":"china 美國 英國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"13k1p3YB_f4AQBwwBxDw"}} {"mediaTag":"美國 英國 士大夫"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"PXk1p3YB_f4AQBwwfxGI"}} {"mediaTag":"美國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"G3k1p3YB_f4AQBwwahEM"}} {"mediaTag":"英國 船"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"G3lap3YB_f4AQBwwNTEX"}} {"mediaTag":"china 美國"} {"create":{"_index":"es_medias_test2","_type":"esmedias","_id":"FXlLp3YB_f4AQBwwUCRf"}} {"mediaTag":"china 美國 法國"}
3. 測試
GET /es_medias_test2/_search { "query": { "match": { "mediaTag": "美國 英國" } } }
返回結果符合預期
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 10,
"max_score" : 1.8475795,
"hits" : [
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "-Xk1p3YB_f4AQBwwNRBt",
"_score" : 1.8475795,
"_source" : {
"mediaTag" : "美國 法國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "FXlLp3YB_f4AQBwwUCRf",
"_score" : 1.5141833,
"_source" : {
"mediaTag" : "china 美國 法國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "o3kyp3YB_f4AQBwwbA7Q",
"_score" : 0.66557413,
"_source" : {
"mediaTag" : "美國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "xXkyp3YB_f4AQBwwpw6Y",
"_score" : 0.66557413,
"_source" : {
"mediaTag" : "美國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "lHk0p3YB_f4AQBwwvxBz",
"_score" : 0.66557413,
"_source" : {
"mediaTag" : "英國 美國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "13k1p3YB_f4AQBwwBxDw",
"_score" : 0.5578373,
"_source" : {
"mediaTag" : "美國 英國 士大夫"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "AXlYp3YB_f4AQBww9zDT",
"_score" : 0.39778596,
"_source" : {
"mediaTag" : "china,美國 英國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "G3k1p3YB_f4AQBwwahEM",
"_score" : 0.39778596,
"_source" : {
"mediaTag" : "英國 船"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "PXk1p3YB_f4AQBwwfxGI",
"_score" : 0.33188638,
"_source" : {
"mediaTag" : "美國"
}
},
{
"_index" : "es_medias_test2",
"_type" : "esmedias",
"_id" : "G3lap3YB_f4AQBwwNTEX",
"_score" : 0.26778817,
"_source" : {
"mediaTag" : "china 美國"
}
}
]
}
}