1. 程式人生 > 其它 >【Hbase】hbase通過meta取splitkey預分割槽建表

【Hbase】hbase通過meta取splitkey預分割槽建表

思路:從頁面抓table ddl,從meta表ENDKEY拿預分割槽key,然後組裝建表語句

1. 獲取表定義table.ddl

1_get_alltable_ddl.sh $MASTER_HOST

#!/bin/bash
# hbase master主機名或ip
MASTER_HOST=$1
curl http://${MASTER_HOST}:16010/tablesDetailed.jsp > table.jsp.tmp
cat table.jsp.tmp |grep "<td>"|grep -vE "<td><a"|awk -F'<td>' '{print $2}'|awk -F'</td>' '{print $1}' > table.ddl

執行結果示例:生成的table.ddl檔案內容如下

'test:groupmsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '10737418240'}}, {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
'test:trace_log', {TABLE_ATTRIBUTES => {DURABILITY => 'SKIP_WAL', CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '53687091200'}}, {NAME => 'f', BLOOMFILTER => 'ROWCOL', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => '2592000 SECONDS (30 DAYS)', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
'test:usermsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '10737418240'}}, {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}

2. 獲取表元資料table meta

2_get_table_meta.sh ${table_name}

#!/bin/bash
TABLE_NAME=$1
TABLE_META_PATH="${TABLE_NAME}.meta"
hbase shell <<< "scan 'hbase:meta',{FILTER=>\"PrefixFilter('${TABLE_NAME},')\"}" > ${TABLE_META_PATH}

執行結果示例:生成的${TABLE_NAME}.meta (test:usermsg.meta)檔案內容如下

HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 1.2.6.1, rUnknown, Mon Nov 11 08:58:58 UTC 2019

scan 'hbase:meta',{FILTER=>"PrefixFilter('test:groupmsg,')"}
ROW  COLUMN+CELL
 test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:regioninfo, timestamp=1624508489258, value={ENCODED => 1a2bb43902a942a57c24ad5d9b64d3fc, NAME => 'test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc.', STARTKEY => '', ENDKEY => '40000000'}
 test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:seqnumDuringOpen, timestamp=1624508489258, value=\x00\x00\x00\x00\x00B\x13\x16
 test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:server, timestamp=1624508489258, value=hbase-rs103.xx.example.com:16020
 test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:serverstartcode, timestamp=1624508489258, value=1624507850013
 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:regioninfo, timestamp=1625218758316, value={ENCODED => 764d259a82bfca7b45d8737fa83c5436, NAME => 'test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436.', STARTKEY => '40000000', ENDKEY => '80000000'}
 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:seqnumDuringOpen, timestamp=1625218758316, value=\x00\x00\x00\x00\x02q\x0E\xF4
 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:server, timestamp=1625218758316, value=hbase-rs102.xx.example.com:16020
 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:serverstartcode, timestamp=1625218758316, value=1625218597990
 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:regioninfo, timestamp=1624416270816, value={ENCODED => c365430d306ad7eac771395aa4573ea0, NAME => 'test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0.', STARTKEY => '80000000', ENDKEY => 'c0000000'}
 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:seqnumDuringOpen, timestamp=1624416270816, value=\x00\x00\x00\x00\x02\x05\x02\x19
 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:server, timestamp=1624416270816, value=hbase-rs195.xx.example.com:16020
 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:serverstartcode, timestamp=1624416270816, value=1624416077140
 test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:regioninfo, timestamp=1621828288501, value={ENCODED => 8a5ca10df6e2e8572cc4f8c1a7256722, NAME => 'test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722.', STARTKEY => 'c0000000', ENDKEY => ''}
 test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:seqnumDuringOpen, timestamp=1621828288501, value=\x00\x00\x00\x00\x01\x96\x93\xEA
 test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:server, timestamp=1621828288501, value=hbase-rs179.xx.example.com:16020
 test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:serverstartcode, timestamp=1621828288501, value=1621828098776
4 row(s) in 0.5810 seconds
-- 該表有 4 個region

3. 生成建表語句

生成建表語句,將建表語句輸出到檔案$TABLE_NAME.ddl
如果考慮將regions數量減半,可通過只取奇數行或偶數行key, 也可多次取奇數減半再減半,下面指令碼中只取一次,也就是ln2
3_generate_table_ddl.sh ${table_name}

#!/bin/bash
TABLE_DDL_MAP=table.ddl
TABLE_NAME=$1
TABLE_META_PATH="${TABLE_NAME}.meta"
TABLE_DDL_WITH_PRESPLITS=""

function general_table_ddl {
        local TABLE_NAME=$1
        local DDL={`< ${TABLE_DDL_MAP} grep -i "'${TABLE_NAME}'"|awk -F", {" '{print $NF}'`
        local TABLE_DDL="create '${TABLE_NAME}', ${DDL}"
        # 預分割槽
        # 將regions數量減半,只取奇數行split key
        local PRE_SPLITS_STRING=$(< $TABLE_META_PATH grep -i 'ENDKEY'|awk -F\> '{print $NF}'|tr -d '}'|awk 'NR%2 != 0'|tr '\n' ','|awk -F", ''," '{print $1}')
        # 預分割槽,按全部splits key分割槽
        #local PRE_SPLITS_STRING=$(< $TABLE_META_PATH grep -i 'ENDKEY'|awk -F\> '{print $NF}'|tr -d '}'|tr '\n' ','|awk -F", ''," '{print $1}')        
        # echo "PRE_SPLITS_STRING====>"$PRE_SPLITS_STRING
        if [ "${PRE_SPLITS_STRING}" != "''," ]
        then
                TABLE_DDL_WITH_PRESPLITS="${TABLE_DDL}, {SPLITS => [${PRE_SPLITS_STRING}]}"
        else
                TABLE_DDL_WITH_PRESPLITS="${TABLE_DDL}"
        fi
        echo -e `date` DEBUG TABLE DDL "===>\n${TABLE_DDL_WITH_PRESPLITS}"
        echo ${TABLE_DDL_WITH_PRESPLITS} > $TABLE_NAME.ddl
        return 0
}

# 生成建表語句
general_table_ddl ${TABLE_NAME} 

執行結果示例:生成的${TABLE_NAME}.ddl (test:groupmsg.ddl)檔案內容如下

create 'test:groupmsg', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}, {SPLITS => [ '40000000', '80000000', 'c0000000']}

如果要修改表的一些屬性,可將修改表語句一同寫入檔案,則test:groupmsg.ddl檔案如下:

create 'test:groupmsg', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}, {SPLITS => [ '40000000', '80000000', 'c0000000']}
alter_async 'test:groupmsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY','hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy','hbase.hregion.max.filesize' => '10737418240'}}

4. 建表

通過從檔案中讀取命令方式執行sql

# 提前確認namespace是否存在,否則會報錯
hbase shell ./test:groupmsg.ddl