elasticsearch支援大table格式資料的搜尋
阿新 • • 發佈:2021-09-08
一、問題源起
資料情況
TableMeta, 儲存table的元資料,通過fileId關聯具體的GridFS檔案;
id | name | creator | fileId |
---|---|---|---|
1 | table1 | mango | f1 |
2 | table2 | mango | f2 |
table內包含列名和具體的行資料;
不同型別的table,列的名字和數量都可能不同;
from fport to toport location
192.168.1.1 11 192.168.1.12 11 chaoyang
192.168.1.2 22 192.168.1.13 22 tongzhou
搜尋要求
支援所有型別的table的搜尋;
支援全欄位的搜尋;
只返回表內命中的行,並進行高亮;
二、開發環境
elasticsearch 6.8.12
java 12.0.2 2019-07-16
Java(TM) SE Runtime Environment (build 12.0.2+10)
Java HotSpot(TM) 64-Bit Server VM (build 12.0.2+10, mixed mode, sharing)
三、elastic search對array的支援情況
扁平化陣列元素
預設情況下elastic search會將陣列內部物件的欄位進行扁平化處理,這樣就會丟失掉元素的獨立性。
直接index一個文件
PUT my_array_index/_doc/1 { "group" : "fans", "user" : [ { "first" : "John", "last" : "Smith" }, { "first" : "Alice", "last" : "White" } ] } { "_index":"my_array_index", "_type":"_doc", "_id":"1", "_version":1, "result":"created", "_shards":{ "total":2, "successful":1, "failed":0 }, "_seq_no":0, "_primary_term":1 }
elastic search 內部會將文件轉化為如下形式再進行索引
{
"group" : "fans",
"user.first" : [ "alice", "john" ],
"user.last" : [ "smith", "white" ]
}
扁平化處理將所有陣列元素物件的相同欄位值合併到一起作為一個數組,這樣就丟失了user.first和user.last之間的對應關係,類似下邊的查詢即使沒有Alice Smith這個人也可以命中
GET my_index/_search { "query": { "bool": { "must": [ { "match": { "user.first": "Alice" }}, { "match": { "user.last": "Smith" }} ] } } } { "took":2, "timed_out":false, "_shards":{ "total":5, "successful":5, "skipped":0, "failed":0 }, "hits":{ "total":1, "max_score":0.5753642, "hits":[ { "_index":"my_array_index", "_type":"_doc", "_id":"1", "_score":0.5753642, "_source":{ "group":"fans", "user":[ { "first":"John", "last":"Smith" }, { "first":"Alice", "last":"White" } ] } } ] } }
使用nested資料型別文件化陣列元素
elastic search內部提供了nested資料型別,可以將陣列元素作為單獨的隱藏的內部文件進行索引,從而保持文件之間的獨立性;
將欄位對映為nested型別
PUT my_nested_index
{
"mappings": {
"_doc": {
"properties": {
"user": {
"type": "nested"
}
}
}
}
}
{
"acknowledged":true,
"shards_acknowledged":true,
"index":"my_nested_index"
}
index文件
PUT my_nested_index/_doc/1
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
{
"_index":"my_nested_index",
"_type":"_doc",
"_id":"1",
"_version":1,
"result":"created",
"_shards":{
"total":2,
"successful":1,
"failed":0
},
"_seq_no":0,
"_primary_term":1
}
elastic search提供了單獨的nested query 來支援nested型別
GET my_nested_index/_search
{
"query": {
"nested": {
"path": "user",
"query": {
"bool": {
"must": [
{ "match": { "user.first": "Alice" }},
{ "match": { "user.last": "Smith" }}
]
}
}
}
}
}
{
"took":3,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":0,
"max_score":null,
"hits":[
]
}
}
nested query提供了inner_hits類支援欄位高亮,從高亮資訊中可以看到,offset欄位指出了命中了陣列中的第幾個元素;
GET my_nested_index/_search
{
"query": {
"nested": {
"path": "user",
"query": {
"bool": {
"should": [
{ "match": { "user.first": "Alice" }},
{ "match": { "user.last": "smith" }}
]
}
},
"inner_hits": {
"highlight": {
"fields": {
"*": {}
}
}
}
}
}
}
{
"took":8,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":1,
"max_score":0.6931472,
"hits":[
{
"_index":"my_nested_index",
"_type":"_doc",
"_id":"1",
"_score":0.6931472,
"_source":{
"group":"fans",
"user":[
{
"first":"John",
"last":"Smith"
},
{
"first":"Alice",
"last":"White"
}
]
},
"inner_hits":{
"user":{
"hits":{
"total":2,
"max_score":0.6931472,
"hits":[
{
"_index":"my_nested_index",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"user",
"offset":0
},
"_score":0.6931472,
"_source":{
"first":"John",
"last":"Smith"
},
"highlight":{
"user.last":[
"<em>Smith</em>"
]
}
},
{
"_index":"my_nested_index",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"user",
"offset":1
},
"_score":0.6931472,
"_source":{
"first":"Alice",
"last":"White"
},
"highlight":{
"user.first":[
"<em>Alice</em>"
]
}
}
]
}
}
}
}
]
}
}
總結
經過以上的研究可以看到,elastic search提供的nested資料型別基本滿足我們的目標要求,接下來使用具體的table資料做進一步的研究;
四、使用nested資料型別索引Table資料
elastic search索引資料結構
欄位名字 | 欄位型別 | 描述 |
---|---|---|
id | string | 主鍵 |
name | string | table的名字 |
creator | string | 建立者 |
content | (object) array | 行資料陣列 |
elastic search mapping
PUT tables
{
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"creator": {
"type": "keyword"
},
"content": {
"type": "nested"
}
}
}
}
}
{
"acknowledged": true,
"shards_acknowledged": true,
"index": "tables"
}
index 一個Table data
PUT tables/_doc/1
{
"id":"1",
"name":"table1",
"creator":"mango",
"content":[
{
"0":"192.168.1.1",
"1":"11",
"2":"192.168.1.12",
"3":"11",
"4":"chaoyang"
},
{
"0":"192.168.1.2",
"1":"22",
"2":"192.168.1.13",
"3":"22",
"4":"tongzhou"
},
{
"0":"192.168.3",
"1":"33",
"2":"192.168.1.14",
"3":"33",
"4":"daxing"
}
]
}
{
"_index":"tables",
"_type":"_doc",
"_id":"1",
"_version":1,
"result":"created",
"_shards":{
"total":2,
"successful":1,
"failed":0
},
"_seq_no":0,
"_primary_term":1
}
search Table data
搜尋所有列
限制只返回Table的元資料資訊
限制只返回命中行的資訊
返回命中行的高亮資訊
post /tables/_search/
{
"from":0,
"size":20,
"_source":{
"excludes":[
"content"
]
},
"query":{
"nested":{
"path":"content",
"query":{
"query_string":{
"fields":[
"content.*"
],
"query":"tongzhou 192.168.1.1"
}
},
"inner_hits":{
"from":0,
"size":2,
"highlight":{
"fields":{
"*":{
}
}
}
}
}
}
}
{
"took":19,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":1,
"max_score":0.9808292,
"hits":[
{
"_index":"tables",
"_type":"_doc",
"_id":"1",
"_score":0.9808292,
"_source":{
"creator":"mango",
"name":"table1",
"id":"1"
},
"inner_hits":{
"content":{
"hits":{
"total":2,
"max_score":0.9808292,
"hits":[
{
"_index":"tables",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"content",
"offset":0
},
"_score":0.9808292,
"_source":{
"0":"192.168.1.1",
"1":"11",
"2":"192.168.1.12",
"3":"11",
"4":"chaoyang"
},
"highlight":{
"content.0":[
"<em>192.168.1.1</em>"
]
}
},
{
"_index":"tables",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"content",
"offset":1
},
"_score":0.9808292,
"_source":{
"0":"192.168.1.2",
"1":"22",
"2":"192.168.1.13",
"3":"22",
"4":"tongzhou"
},
"highlight":{
"content.4":[
"<em>tongzhou</em>"
]
}
}
]
}
}
}
}
]
}
}