1. 程式人生 > 實用技巧 >elasticsearch通過logstash資料遷移

elasticsearch通過logstash資料遷移

問題描述

自建es,資料遷移到aws上es

問題解決

對每個索引進行同步,寫入到aws上es

env

  • centos7.x es (slef building)
  • aws es

step1: check index && version

curl -s -u xxx:'yyy' https://xxxxx:9200  //view version
curl -s -u xxx:'yyy' https://xxxxx:9200/_cat/indices?h=index

step2: logstash

01、openjdk

#aws vpc 內準備一臺虛機
wget https://download.java.net/java/GA/jdk11/9/GPL/openjdk-11.0.2_linux-x64_bin.tar.gz

export JAVA_HOME=/tmp/reindex/jdk-11.0.2
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PATH=$JAVA_HOME/bin:$PATH

https://artifacts.elastic.co/downloads/logstash/logstash-7.4.2.tar.gz   #遷移對logstash的版本沒有太大的要求一般與主版本相同即可 7.x = 7.x

02、logstash conf
1.tmpl

input {
    elasticsearch {
    hosts => ["xxx:9200"]
    index => "${INDEX}"
    size =>5000
    scroll =>"50m"
    docinfo => true
  }
}

filter {
}

output {
  elasticsearch {
    hosts => ["xxxx-1.es.amazonaws.com:443"]
    ssl => true
    user => "xxx"
    password => "xxxx"
    pool_max => 5000
    pool_max_per_route =>500
    index => "%{[@metadata][_index]}_fix"   #新建的索引加上_fix或者跟原索引相同去掉_fix
    document_type => "%{[@metadata][_type]}"
    document_id => "%{[@metadata][_id]}"
    ilm_enabled => false
  }
}

03、根據索引執行遷移資料

export INDEX="ui_click" && envsubst < ./1.tmpl >1.conf
./bin/logstash -f 1.conf  -w 50 -b 5000 -u 120

step3: validify data

get /login/_count   #資料量

get /login/_search  #資料

get /login/_search  #範圍查詢
{
  "size": 10,
  "query":
     {
       "range": {
         "time": {      #field time in login
           "gte": "1592409600000",  #ms
           "lte": "1591200000000"
         }
       }
     }
}

get /login/_search?sort=time:desc&size=1    #欄位排序降序time