1. 程式人生 > 其它 >建立clickhouse 分散式表方式 及 分散式表資料增加、更新、刪除、查詢

建立clickhouse 分散式表方式 及 分散式表資料增加、更新、刪除、查詢

技術標籤:clickhouse 列式儲存大資料clickhouse

建立clickhouse 分散式表方式 及分散式表資料增加、更新、刪除、查詢

1、建表語句

(1)、本地表:分別在每個節點都建立本地表,或者使用 on cluster【在其中一個節點執行即可】

CREATE TABLE city_local on cluster ck_cluster_name (
`fdate` Int64,
`city_code` Int32,
`city_name` String,
`total_cnt` Int64
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/city_local', '{replica}')
PARTITION BY fdate
ORDER BY (fdate, city_code, city_name)
SETTINGS index_granularity = 8192, storage_policy = 'ssd_to_hdd';

說明:{} 裡邊表示配置檔案對應的變數

(2)、分散式表,本身不儲存資料,詳細瞭解請閱讀官方文件。在其中一個節點執行即可。

分散式表一般加_all,第一個引數是叢集名稱,第二個引數是資料庫名稱,第三個引數是對應的本地表,第四個引數是隨機分佈資料。

-- 推薦使用方式一,因為這樣只需要重新建立一個all與對應local表的對映關係即可

-- 方式一:

-- 示例1:

CREATE TABLE IF NOT EXISTS test_db.city_all ON CLUSTER ck_cluster_name AS test_db.city_local
ENGINE = Distributed(ck_cluster_name, test_db, city_local, fdate);

-- 示例2:

CREATE TABLE IF NOT EXISTS test_db.city_all ON CLUSTER ck_cluster_name AS test_db.city_local
ENGINE = Distributed(ck_cluster_name, test_db, city_local, rand());

-- 方式二:

CREATE TABLE city_all on cluster ck_cluster_name (
`fdate` Int64,
`city_code` Int32,
`city_name` String,
`total_cnt` Int64
) ENGINE = Distributed(ck_cluster_name, test_db, city_local, rand())

2、分散式表插入資料

insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 4000, 'guangzhou', 420000);

insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 5000, 'shenzhan', 55000);

insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 6000, 'huizhou', 65000);

insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 7000, 'huizhou', 75000);

3、分散式表查詢資料

-- 插入資料後,查詢資料

select * from city_all;

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │5000 │ shenzhan│55000 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │7000 │ huizhou│75000 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │6000 │ huizhou│65000 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │4000 │ guangzhou │420000 │

└──────────┴───────────┴───────────┴───────────┘

4、分散式表更新資料

-- 更新資料【生效】,通過local表來更新 同時指定上叢集名稱;如果通過all來更新則不支援會報錯

ALTER TABLE city_local ON CLUSTER ck_cluster_name UPDATE total_cnt = 2222 WHERE city_name = 'huizhou';

-- 通過all來更新會報錯,An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #3 is replicated according to its cluster definition (version 20.2.1.2183 (official build))

-- ALTER TABLE city_all ON CLUSTER ck_cluster_name UPDATE total_cnt = 3333 WHERE city_name = 'huizhou';

SELECT * FROM city_all;

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │4000 │ guangzhou │420000 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │6000 │ huizhou│2222 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │7000 │ huizhou│2222 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │5000 │ shenzhan│55000 │

└──────────┴───────────┴───────────┴───────────┘

5、分散式表刪除資料

-- 刪除資料【生效】,通過local表來刪除 同時指定上叢集名稱;如果通過all來刪除則不支援會報錯

ALTER TABLE city_local ON CLUSTER ck_cluster_name DELETE WHERE city_name = 'huizhou';

-- An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #3 is replicated according to its cluster definition (version 20.2.1.2183 (official build))

-- ALTER TABLE city_all ON CLUSTER ck_cluster_name DELETE WHERE city_name = 'huizhou';

SELECT * FROM city_all;

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │5000 │ shenzhan│55000 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │4000 │ guangzhou │420000 │

└──────────┴───────────┴───────────┴───────────┘

-- 插入資料後,查詢資料

SELECT * FROM city_all

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │5000 │ shenzhan│55000 │

└──────────┴───────────┴───────────┴───────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┐

│ 20210131 │4000 │ guangzhou │420000 │

└──────────┴───────────┴───────────┴───────────┘

6、分散式表新增欄位

-- 新增歷史欄位

-- mysql 新增欄位

-- ALTER TABLE test_table add age int DEFAULT NULL COMMENT '年齡' after name;

-- 生效

alter table city_local ON CLUSTER ck_cluster_name add column history Int32;

-- 同時支援指定在某個欄位後面新增欄位

alter table city_local ON CLUSTER ck_cluster_name add column history Int32 after city_code;

-- 不生效,報錯:An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated,

-- but shard #3 is replicated according to its cluster definition (version 20.2.1.2183 (official build)).

alter table city_all ON CLUSTER ck_cluster_name add column history Int32;

-- 嘗試將city_all 刪除重建

drop table test_db.city_all ON CLUSTER ck_cluster_name;

-- 重建all表,方式一:【可行】

CREATE TABLE IF NOT EXISTS test_db.city_all ON CLUSTER ck_cluster_name AS test_db.city_local
ENGINE = Distributed(ck_cluster, test_db, city_local, rand());
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │5000 │ shenzhan│55000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │4000 │ guangzhou │420000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

-- 重建all表,方式二:【可行】

CREATE TABLE city_all on cluster ck_cluster_name (
`fdate` Int64,
`city_code` Int32,
`city_name` String,
`total_cnt` Int64,
`history` Int32
) ENGINE = Distributed(ck_cluster_name, test_db, city_local, rand())
┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │5000 │ shenzhan│55000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │4000 │ guangzhou │420000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

-- 查詢表的建立語句

show create table city_local;

show create table city_all;

-- 新增欄位後,查詢資料

select * from city_all;

7、分散式表新增欄位後,插入資料驗證

-- 新增欄位後,插入資料,並不對新欄位賦值,驗證可行。新增history欄位預設為0

insert into city_all (fdate, city_code, city_name, total_cnt) values (20210131, 7000, 'foshan', 75000);

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │7000 │ foshan│75000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │4000 │ guangzhou │420000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │5000 │ shenzhan│55000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

-- 新增欄位後,插入資料,對新欄位賦值

insert into city_all (fdate, city_code, city_name, total_cnt, history) values (20210131, 7000, 'dongguan', 85000, 2021);

-- 新增欄位後,查詢資料

select * from city_all;

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │4000 │ guangzhou │420000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │7000 │ dongguan│85000 │2021 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │5000 │ shenzhan│55000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │7000 │ foshan│75000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

-- 對分散式all表更新操作【不生效】An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #1 is replicated according to its cluster definition (version 20.2.1.2183 (official build))

ALTER TABLE city_all ON CLUSTER ck_cluster_name UPDATE total_cnt = 3333 WHERE city_name = 'foshan';

-- 對分散式all表刪除資料【不生效】An error occurred before execution: Code: 371, e.displayText() = DB::Exception: Table 'city_all' isn't replicated, but shard #1 is replicated according to its cluster definition (version 20.2.1.2183 (official build))

ALTER TABLE city_all ON CLUSTER ck_cluster_name DELETE WHERE city_name = 'foshan'

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │5000 │ shenzhan│55000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │7000 │ dongguan│85000 │2021 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │7000 │ foshan│75000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

┌────fdate─┬─city_code─┬─city_name─┬─total_cnt─┬─history─┐

│ 20210131 │4000 │ guangzhou │420000 │0 │

└──────────┴───────────┴───────────┴───────────┴─────────┘

-- 刪除分散式表【生效】

drop table city_local on cluster ck_cluster_name;

drop table city_all on cluster ck_cluster_name;

文章最後,給大家推薦一些受歡迎的技術部落格連結

  1. JAVA相關的深度技術部落格連結
  2. Flink 相關技術部落格連結
  3. Spark核心技術連結
  4. 設計模式 —— 深度技術部落格連結
  5. 機器學習 —— 深度技術部落格連結
  6. Hadoop相關技術部落格連結
  7. 超全乾貨--Flink思維導圖,花了3周左右編寫、校對
  8. 深入JAVA 的JVM核心原理解決線上各種故障【附案例】
  9. 請談談你對volatile的理解?--最近小李子與面試官的一場“硬核較量”
  10. 聊聊RPC通訊,經常被問到的一道面試題。原始碼+筆記,包懂
  11. 深入聊聊Java 垃圾回收機制【附原理圖及調優方法】

歡迎掃描下方的二維碼或 搜尋 公眾號“大資料高階架構師”,我們會有更多、且及時的資料推送給您,歡迎多多交流!