PG copy&insert效能對比
阿新 • • 發佈:2018-11-28
目錄
測試環境
Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
32G memory
SSD harddisk.
postgresql.conf
max_connections = 256 shared_buffers = 3GB # min 128kB work_mem = 16MB # min 64kB maintenance_work_mem = 256MB # min 1MB wal_buffers = 4MB checkpoint_timeout = 10min # range 30s-1d max_wal_size = 2GB checkpoint_completion_target = 0.9
表結構
zj_testdb=# \d+ t1 Table "public.t1" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description --------+--------+-----------+----------+---------+---------+--------------+------------- c1 | bigint | | | | plain | | c2 | bigint | | | | plain | | c3 | bigint | | | | plain | | c4 | bigint | | | | plain | | c5 | bigint | | | | plain | | c6 | bigint | | | | plain | | c7 | bigint | | | | plain | | c8 | bigint | | | | plain | | c9 | bigint | | | | plain | | c10 | bigint | | | | plain | | c11 | bigint | | | | plain | | c12 | bigint | | | | plain | | c13 | bigint | | | | plain | | c14 | bigint | | | | plain | | c15 | bigint | | | | plain | | c16 | bigint | | | | plain | | c17 | bigint | | | | plain | | c18 | bigint | | | | plain | | c19 | bigint | | | | plain | | c20 | bigint | | | | plain | | c21 | bigint | | | | plain | | c22 | bigint | | | | plain | | c23 | bigint | | | | plain | | c24 | bigint | | | | plain | | c25 | bigint | | | | plain | | c26 | bigint | | | | plain | | c27 | bigint | | | | plain | | c28 | bigint | | | | plain | | c29 | bigint | | | | plain | | c30 | bigint | | | | plain | | c31 | bigint | | | | plain | | c32 | bigint | | | | plain | | c33 | bigint | | | | plain | | c34 | bigint | | | | plain | | c35 | bigint | | | | plain | | c36 | bigint | | | | plain | | c37 | bigint | | | | plain | | c38 | bigint | | | | plain | | c39 | bigint | | | | plain | | c40 | bigint | | | | plain | | c41 | bigint | | | | plain | | c42 | bigint | | | | plain | | c43 | bigint | | | | plain | | c44 | bigint | | | | plain | | c45 | bigint | | | | plain | | c46 | bigint | | | | plain | | c47 | bigint | | | | plain | | c48 | bigint | | | | plain | | c49 | bigint | | | | plain | | c50 | bigint | | | | plain | | c51 | bigint | | | | plain | | c52 | bigint | | | | plain | | c53 | bigint | | | | plain | | c54 | bigint | | | | plain | | c55 | bigint | | | | plain | | c56 | bigint | | | | plain | | c57 | bigint | | | | plain | | c58 | bigint | | | | plain | | c59 | bigint | | | | plain | | c60 | bigint | | | | plain | | c61 | bigint | | | | plain | | c62 | bigint | | | | plain | | c63 | bigint | | | | plain | | c64 | bigint | | | | plain | | c65 | bigint | | | | plain | | c66 | bigint | | | | plain | | c67 | bigint | | | | plain | | c68 | bigint | | | | plain | | c69 | bigint | | | | plain | | c70 | bigint | | | | plain | | c71 | bigint | | | | plain | | c72 | bigint | | | | plain | | c73 | bigint | | | | plain | | c74 | bigint | | | | plain | | c75 | bigint | | | | plain | | c76 | bigint | | | | plain | | c77 | bigint | | | | plain | | c78 | bigint | | | | plain | | c79 | bigint | | | | plain | | c80 | bigint | | | | plain | | c81 | bigint | | | | plain | | c82 | bigint | | | | plain | | c83 | bigint | | | | plain | | c84 | bigint | | | | plain | | c85 | bigint | | | | plain | | c86 | bigint | | | | plain | | c87 | bigint | | | | plain | | c88 | bigint | | | | plain | | c89 | bigint | | | | plain | | c90 | bigint | | | | plain | | c91 | bigint | | | | plain | | c92 | bigint | | | | plain | | c93 | bigint | | | | plain | | c94 | bigint | | | | plain | | c95 | bigint | | | | plain | | c96 | bigint | | | | plain | | c97 | bigint | | | | plain | | c98 | bigint | | | | plain | | c99 | bigint | | | | plain | | c100 | bigint | | | | plain | | Tablespace: "zj_tablespace"
CASE1
batchsize=100, 8 threads
test_insert.sql
[[email protected] zjcktest]# cat test_insert.sql insert into t1 select generate_series(1,100),1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1
copy csv
copy (select generate_series(1,100),1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1) to '/data/zjcktest/csv';
copy.sql
[[email protected] zjcktest]# cat copy.sql
copy t1 from '/data/zjcktest/csv' ;
pgbench命令執行insert和copy
INSERT:
pgbench -M prepared -n -r -P 1 -f /data/zjcktest/test_insert.sql -h 127.0.0.1 -p 5432 -U postgres zj_testdb -c 8 -j 8 -T 120
COPY:
pgbench -M prepared -n -r -P 1 -f /data/zjcktest/copy.sql -h 127.0.0.1 -p 5432 -U postgres zj_testdb -c 8 -j 8 -T 120
結果
兩者速率相差不大,copy略快5%
8 threads batchsize=100 | 行數 | 事務數 | TPS |
batch 100 insert select | 12022100 | 120221 | 1000.835 |
batch 100 copy | 12771100 | 127711 | 1057.615 |
batch 100 insert select | 12945500 | 129455 | 1077.917 |
batch 100 copy | 13552200 | 135522 | 1120.751 |
batch 100 insert select | 13164900 | 131649 | 1092.143 |
batch 100 copy | 13299600 | 132996 | 1107.717 |
CASE 2
1 thread, batchsize=100
修改pgbench執行命令
INSERT:
pgbench -M prepared -n -r -P 1 -f /data/zjcktest/test_insert.sql -h 127.0.0.1 -p 5432 -U postgres zj_testdb -c 1 -j 1 -T 120
COPY:
pgbench -M prepared -n -r -P 1 -f /data/zjcktest/copy.sql -h 127.0.0.1 -p 5432 -U postgres zj_testdb -c 1 -j 1 -T 120
結果
比8執行緒慢25%。 copy和insert很接近,insert比copy略好3%。
1 thread, batchsize=100 | 行數 | 事務數 | TPS |
batch 100 insert select | 9003100 | 90031 | 750.254 |
batch 100 copy | 8903700 | 89037 | 741.454 |
batch 100 insert select | 8971300 | 89713 | 747.577 |
batch 100 copy | 8798700 | 87987 | 733.224 |
batch 100 insert select | 8839900 | 88399 | 736.622 |
batch 100 copy | 8776300 | 87763 | 731.356 |
batch 100 insert select | 9042900 | 90429 | 753.572 |
batch 100 copy | 8827400 | 88274 | 735.607 |
CASE 3
1 thread, batchsize=1000
修改test_insert.sql和生成新的copy檔案
copy (select generate_series(1,1000),1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1) to '/data/zjcktest/csv';
[[email protected] zjcktest]# cat test_insert.sql
insert into t1 select generate_series(1,1000),1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1
結果
仍舊insert稍快 4%。
1 thread, batchsize=1000 | 行數 | 事務數 | TPS |
batch 100 insert select | 11179000 | 11179 | 93 |
batch 100 copy | 10794000 | 10794 | 89.948 |
TPS修正
考慮到copy需要讀取檔案,按照金士頓 SSD 讀取500MB/s來計算
總讀取位元組 | 讀取速率 | cost time | 修正後TPS |
2179233042 | 524288000 | 4.16 s | 93.18 |
去掉讀檔案的時間,總的TPS和insert幾乎一致。
CASE 4
1 thread, batchsize = 1
修改test_insert.sql 和 重新生成copy檔案csv
cat test_insert.sql
insert into t1 values (5,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1)
zj_testdb=# copy (select 5,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1) to '/data/zjcktest/csv';
結果
insert要比copy快 15%.
由於單行頻繁開啟關閉檔案控制代碼導致copy效能下降。
1 thread, batchsize=1 | 行數 | 事務數 | TPS |
insert | 835632 | 835632 | 6963.591 |
copy | 696941 | 696941 | 5807.831 |
結論
在test_insert.sql中,使用generate_series 而不是顯示一行一行insert語句的情況下,insert 整體效能和copy基本一致,單執行緒情況下insert效能更好。
對比test_insert.sql裡有100行insert語句的測試,這裡少了絕大部分的SQL語句的解析,效能與copy一樣甚至超過。