【Hbase】hbase通過meta取splitkey預分割槽建表
阿新 • • 發佈:2022-03-10
思路:從頁面抓table ddl,從meta表ENDKEY拿預分割槽key,然後組裝建表語句
1. 獲取表定義table.ddl
1_get_alltable_ddl.sh $MASTER_HOST
#!/bin/bash # hbase master主機名或ip MASTER_HOST=$1 curl http://${MASTER_HOST}:16010/tablesDetailed.jsp > table.jsp.tmp cat table.jsp.tmp |grep "<td>"|grep -vE "<td><a"|awk -F'<td>' '{print $2}'|awk -F'</td>' '{print $1}' > table.ddl
執行結果示例:生成的table.ddl檔案內容如下
'test:groupmsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '10737418240'}}, {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'} 'test:trace_log', {TABLE_ATTRIBUTES => {DURABILITY => 'SKIP_WAL', CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '53687091200'}}, {NAME => 'f', BLOOMFILTER => 'ROWCOL', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => '2592000 SECONDS (30 DAYS)', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'} 'test:usermsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '10737418240'}}, {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
2. 獲取表元資料table meta
2_get_table_meta.sh ${table_name}
#!/bin/bash
TABLE_NAME=$1
TABLE_META_PATH="${TABLE_NAME}.meta"
hbase shell <<< "scan 'hbase:meta',{FILTER=>\"PrefixFilter('${TABLE_NAME},')\"}" > ${TABLE_META_PATH}
執行結果示例:生成的${TABLE_NAME}.meta (test:usermsg.meta)檔案內容如下
HBase Shell; enter 'help<RETURN>' for list of supported commands. Type "exit<RETURN>" to leave the HBase Shell Version 1.2.6.1, rUnknown, Mon Nov 11 08:58:58 UTC 2019 scan 'hbase:meta',{FILTER=>"PrefixFilter('test:groupmsg,')"} ROW COLUMN+CELL test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:regioninfo, timestamp=1624508489258, value={ENCODED => 1a2bb43902a942a57c24ad5d9b64d3fc, NAME => 'test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc.', STARTKEY => '', ENDKEY => '40000000'} test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:seqnumDuringOpen, timestamp=1624508489258, value=\x00\x00\x00\x00\x00B\x13\x16 test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:server, timestamp=1624508489258, value=hbase-rs103.xx.example.com:16020 test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:serverstartcode, timestamp=1624508489258, value=1624507850013 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:regioninfo, timestamp=1625218758316, value={ENCODED => 764d259a82bfca7b45d8737fa83c5436, NAME => 'test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436.', STARTKEY => '40000000', ENDKEY => '80000000'} test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:seqnumDuringOpen, timestamp=1625218758316, value=\x00\x00\x00\x00\x02q\x0E\xF4 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:server, timestamp=1625218758316, value=hbase-rs102.xx.example.com:16020 test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:serverstartcode, timestamp=1625218758316, value=1625218597990 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:regioninfo, timestamp=1624416270816, value={ENCODED => c365430d306ad7eac771395aa4573ea0, NAME => 'test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0.', STARTKEY => '80000000', ENDKEY => 'c0000000'} test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:seqnumDuringOpen, timestamp=1624416270816, value=\x00\x00\x00\x00\x02\x05\x02\x19 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:server, timestamp=1624416270816, value=hbase-rs195.xx.example.com:16020 test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:serverstartcode, timestamp=1624416270816, value=1624416077140 test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:regioninfo, timestamp=1621828288501, value={ENCODED => 8a5ca10df6e2e8572cc4f8c1a7256722, NAME => 'test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722.', STARTKEY => 'c0000000', ENDKEY => ''} test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:seqnumDuringOpen, timestamp=1621828288501, value=\x00\x00\x00\x00\x01\x96\x93\xEA test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:server, timestamp=1621828288501, value=hbase-rs179.xx.example.com:16020 test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:serverstartcode, timestamp=1621828288501, value=1621828098776 4 row(s) in 0.5810 seconds -- 該表有 4 個region
3. 生成建表語句
生成建表語句,將建表語句輸出到檔案$TABLE_NAME.ddl
如果考慮將regions數量減半,可通過只取奇數行或偶數行key, 也可多次取奇數減半再減半,下面指令碼中只取一次,也就是ln2
3_generate_table_ddl.sh ${table_name}
#!/bin/bash
TABLE_DDL_MAP=table.ddl
TABLE_NAME=$1
TABLE_META_PATH="${TABLE_NAME}.meta"
TABLE_DDL_WITH_PRESPLITS=""
function general_table_ddl {
local TABLE_NAME=$1
local DDL={`< ${TABLE_DDL_MAP} grep -i "'${TABLE_NAME}'"|awk -F", {" '{print $NF}'`
local TABLE_DDL="create '${TABLE_NAME}', ${DDL}"
# 預分割槽
# 將regions數量減半,只取奇數行split key
local PRE_SPLITS_STRING=$(< $TABLE_META_PATH grep -i 'ENDKEY'|awk -F\> '{print $NF}'|tr -d '}'|awk 'NR%2 != 0'|tr '\n' ','|awk -F", ''," '{print $1}')
# 預分割槽,按全部splits key分割槽
#local PRE_SPLITS_STRING=$(< $TABLE_META_PATH grep -i 'ENDKEY'|awk -F\> '{print $NF}'|tr -d '}'|tr '\n' ','|awk -F", ''," '{print $1}')
# echo "PRE_SPLITS_STRING====>"$PRE_SPLITS_STRING
if [ "${PRE_SPLITS_STRING}" != "''," ]
then
TABLE_DDL_WITH_PRESPLITS="${TABLE_DDL}, {SPLITS => [${PRE_SPLITS_STRING}]}"
else
TABLE_DDL_WITH_PRESPLITS="${TABLE_DDL}"
fi
echo -e `date` DEBUG TABLE DDL "===>\n${TABLE_DDL_WITH_PRESPLITS}"
echo ${TABLE_DDL_WITH_PRESPLITS} > $TABLE_NAME.ddl
return 0
}
# 生成建表語句
general_table_ddl ${TABLE_NAME}
執行結果示例:生成的${TABLE_NAME}.ddl (test:groupmsg.ddl)檔案內容如下
create 'test:groupmsg', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}, {SPLITS => [ '40000000', '80000000', 'c0000000']}
如果要修改表的一些屬性,可將修改表語句一同寫入檔案,則test:groupmsg.ddl檔案如下:
create 'test:groupmsg', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}, {SPLITS => [ '40000000', '80000000', 'c0000000']}
alter_async 'test:groupmsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY','hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy','hbase.hregion.max.filesize' => '10737418240'}}
4. 建表
通過從檔案中讀取命令方式執行sql
# 提前確認namespace是否存在,否則會報錯
hbase shell ./test:groupmsg.ddl