建立clickhouse 分散式表方式 及 分散式表資料增加、更新、刪除、查詢
技術標籤:clickhouse 列式儲存大資料clickhouse
建立clickhouse 分散式表方式 及分散式表資料增加、更新、刪除、查詢
1、建表語句
(1)、本地表:分別在每個節點都建立本地表,或者使用 on cluster【在其中一個節點執行即可】
CREATE TABLE city_local on cluster ck_cluster_name ( `fdate` Int64, `city_code` Int32, `city_name` String, `total_cnt` Int64 ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/city_local', '{replica}') PARTITION BY fdate ORDER BY (fdate, city_code, city_name) SETTINGS index_granularity = 8192, storage_policy = 'ssd_to_hdd';
說明:{} 裡邊表示配置檔案對應的變數
(2)、分散式表,本身不儲存資料,詳細瞭解請閱讀官方文件。在其中一個節點執行即可。
分散式表一般加_all,第一個引數是叢集名稱,第二個引數是資料庫名稱,第三個引數是對應的本地表,第四個引數是隨機分佈資料。
-- 推薦使用方式一,因為這樣只需要重新建立一個all與對應local表的對映關係即可
-- 方式一:
-- 示例1:
CREATE TABLE IF NOT EXISTS test_db.city_all ON CLUSTER ck_cluster_name AS test_db.city_local ENGINE = Distributed(ck_cluster_name, test_db, city_local, fdate);
-- 示例2:
CREATE TABLE IF NOT EXISTS test_db.city_all ON CLUSTER ck_cluster_name AS test_db.city_local
ENGINE = Distributed(ck_cluster_name, test_db, city_local, rand());
-- 方式二:
CREATE TABLE city_all on cluster ck_cluster_name ( `fdate` Int64, `city_code` Int32, `city_name` String, `total_cnt` Int64 ) ENGINE = Distributed(ck_cluster_name, test_db, city_local, rand())
2、分散式表插入資料
insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 4000, 'guangzhou', 420000);
insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 5000, 'shenzhan', 55000);
insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 6000, 'huizhou', 65000);
insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 7000, 'huizhou', 75000);
3、分散式表查詢資料
-- 插入資料後,查詢資料
select * from city_all;
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │5000 │ shenzhan│55000 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │7000 │ huizhou│75000 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │6000 │ huizhou│65000 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │4000 │ guangzhou │420000 │
└──────────┴───────────┴───────────┴───────────┘
4、分散式表更新資料
-- 更新資料【生效】,通過local表來更新 同時指定上叢集名稱;如果通過all來更新則不支援會報錯
ALTER TABLE city_local ON CLUSTER ck_cluster_name UPDATE total_cnt = 2222 WHERE city_name = 'huizhou';
-- 通過all來更新會報錯,An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #3 is replicated according to its cluster definition (version 20.2.1.2183 (official build))
-- ALTER TABLE city_all ON CLUSTER ck_cluster_name UPDATE total_cnt = 3333 WHERE city_name = 'huizhou';
SELECT * FROM city_all;
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │4000 │ guangzhou │420000 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │6000 │ huizhou│2222 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │7000 │ huizhou│2222 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │5000 │ shenzhan│55000 │
└──────────┴───────────┴───────────┴───────────┘
5、分散式表刪除資料
-- 刪除資料【生效】,通過local表來刪除 同時指定上叢集名稱;如果通過all來刪除則不支援會報錯
ALTER TABLE city_local ON CLUSTER ck_cluster_name DELETE WHERE city_name = 'huizhou';
-- An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #3 is replicated according to its cluster definition (version 20.2.1.2183 (official build))
-- ALTER TABLE city_all ON CLUSTER ck_cluster_name DELETE WHERE city_name = 'huizhou';
SELECT * FROM city_all;
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │5000 │ shenzhan│55000 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │4000 │ guangzhou │420000 │
└──────────┴───────────┴───────────┴───────────┘
-- 插入資料後,查詢資料
SELECT * FROM city_all
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │5000 │ shenzhan│55000 │
└──────────┴───────────┴───────────┴───────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐
│ 20210131 │4000 │ guangzhou │420000 │
└──────────┴───────────┴───────────┴───────────┘
6、分散式表新增欄位
-- 新增歷史欄位
-- mysql 新增欄位
-- ALTER TABLE test_table add age int DEFAULT NULL COMMENT '年齡' after name;
-- 生效
alter table city_local ON CLUSTER ck_cluster_name add column history Int32;
-- 同時支援指定在某個欄位後面新增欄位
alter table city_local ON CLUSTER ck_cluster_name add column history Int32 after city_code;
-- 不生效,報錯:An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated,
-- but shard #3 is replicated according to its cluster definition (version 20.2.1.2183 (official build)).
alter table city_all ON CLUSTER ck_cluster_name add column history Int32;
-- 嘗試將city_all 刪除重建
drop table test_db.city_all ON CLUSTER ck_cluster_name;
-- 重建all表,方式一:【可行】
CREATE TABLE IF NOT EXISTS test_db.city_all ON CLUSTER ck_cluster_name AS test_db.city_local
ENGINE = Distributed(ck_cluster, test_db, city_local, rand());
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │5000 │ shenzhan│55000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │4000 │ guangzhou │420000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
-- 重建all表,方式二:【可行】
CREATE TABLE city_all on cluster ck_cluster_name (
`fdate` Int64,
`city_code` Int32,
`city_name` String,
`total_cnt` Int64,
`history` Int32
) ENGINE = Distributed(ck_cluster_name, test_db, city_local, rand())
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │5000 │ shenzhan│55000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │4000 │ guangzhou │420000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
-- 查詢表的建立語句
show create table city_local;
show create table city_all;
-- 新增欄位後,查詢資料
select * from city_all;
7、分散式表新增欄位後,插入資料驗證
-- 新增欄位後,插入資料,並不對新欄位賦值,驗證可行。新增history欄位預設為0
insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 7000, 'foshan', 75000);
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │7000 │ foshan│75000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │4000 │ guangzhou │420000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │5000 │ shenzhan│55000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
-- 新增欄位後,插入資料,對新欄位賦值
insert into city_all (fdate, city_code, city_name, total_cnt, history) values (20210131, 7000, 'dongguan', 85000, 2021);
-- 新增欄位後,查詢資料
select * from city_all;
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │4000 │ guangzhou │420000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │7000 │ dongguan│85000 │2021 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │5000 │ shenzhan│55000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │7000 │ foshan│75000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
-- 對分散式all表更新操作【不生效】An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #1 is replicated according to its cluster definition (version 20.2.1.2183 (official build))
ALTER TABLE city_all ON CLUSTER ck_cluster_name UPDATE total_cnt = 3333 WHERE city_name = 'foshan';
-- 對分散式all表刪除資料【不生效】An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #1 is replicated according to its cluster definition (version 20.2.1.2183 (official build))
ALTER TABLE city_all ON CLUSTER ck_cluster_name DELETE WHERE city_name = 'foshan'
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │5000 │ shenzhan│55000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │7000 │ dongguan│85000 │2021 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │7000 │ foshan│75000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐
│ 20210131 │4000 │ guangzhou │420000 │0 │
└──────────┴───────────┴───────────┴───────────┴─────────┘
-- 刪除分散式表【生效】
drop table city_local on cluster ck_cluster_name;
drop table city_all on cluster ck_cluster_name;
文章最後,給大家推薦一些受歡迎的技術部落格連結:
- JAVA相關的深度技術部落格連結
- Flink 相關技術部落格連結
- Spark核心技術連結
- 設計模式 —— 深度技術部落格連結
- 機器學習 —— 深度技術部落格連結
- Hadoop相關技術部落格連結
- 超全乾貨--Flink思維導圖,花了3周左右編寫、校對
- 深入JAVA 的JVM核心原理解決線上各種故障【附案例】
- 請談談你對volatile的理解?--最近小李子與面試官的一場“硬核較量”
- 聊聊RPC通訊,經常被問到的一道面試題。原始碼+筆記,包懂
- 深入聊聊Java 垃圾回收機制【附原理圖及調優方法】
歡迎掃描下方的二維碼或 搜尋 公眾號“大資料高階架構師”,我們會有更多、且及時的資料推送給您,歡迎多多交流!