vertica系統管理語句 + vertica實時消費kafka
阿新 • • 發佈:2018-12-17
--看鎖表及鎖的型別 select object_name,lock_mode,transaction_id,request_timestamp,transaction_description from locks; select transaction_id from locks where object_name like '%servefc%'; --查歷史查詢的記錄 select schema_name,table_name,user_name,query_type,is_executing,query_start from query_profiles; select *from query_profiles; select schema_name,table_name,user_name,query_type,is_executing,query_start from query_profiles where is_executing='t' --查詢執行次數做多的10個SQL語句 SELECT request,COUNT(*) FROM query_requests GROUP BY request ORDER BY COUNT(*) DESC LIMIT 10; --查詢執行時間最長的10個SQL SELECT request,request_duration_ms FROM query_requests ORDER BY request_duration_ms DESC LIMIT 10; --查詢memory消耗最多的10個SQL SELECT request,memory_acquired_mb FROM query_requests WHERE memory_acquired_mb IS NOT NULL ORDER BY memory_acquired_mb DESC LIMIT 10; --SESSION管理 select * from locks; --獲取transaction_id欄位 select * from sessions where transaction_id in(); --將上面獲取的transaction_id帶入,檢視transaction_start,判斷是否是以前鎖的 select CLOSE_SESSION ('sessionid' ) --帶入上面查出來的session_id --檢視造成死鎖的那個會話session有哪些歷史操作 select c.query,c.query_start from locks a left join sessions b on a.transaction_id=b.transaction_id left join query_profiles c on b.session_id=c.session_id where a.object_name like '%tb_dw_ct_cti_agent_call_list_min%'; --資源池 select * from RESOURCE_POOLS; --設定 SELECT GET_COMPLIANCE_STATUS(); -- 建立ODM使用者資源池,主要做外部表查詢 create resource pool pool_noas_odmMAXMEMORYSIZE '50%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 12; -- 建立DW使用者資源池,主要做大表載入、關聯、彙總 create resource pool pool_noas_dwMAXMEMORYSIZE '90%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY AUTO; -- 建立APP使用者資源池,主要做查詢、表關聯、指標運算 create resource pool pool_noas_appMAXMEMORYSIZE '80%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 24; CREATE USER "user_dw" WITH PASSWORD 'cmcc'; -- 資源池範例:低併發大查詢 CREATE RESOURCE POOL l_poolQUEUETIMEOUT NONE PLANNEDCONCURRENCY 6 MAXCONCURRENCY 4; -- 資源池範例:高發小查詢 CREATE RESOURCE POOL s_poolMEMORYSIZE '1G' EXECUTIONPARALLELISM 4 PRIORITY 10 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 36 MAXCONCURRENCY 50; GRANT ALL ON RESOURCE POOL l_pool TO user_dw; ALTER USER user_dw RESOURCE POOLl_pool; GRANT ALL ON SCHEMA DW TO user_dw; -- 建立使用者 create user "dev_noas_odm"identified by 'noas_odm' resource pool pool_noas_odm; create user "dev_noas_dw"identified by 'noas_dw' resource pool pool_noas_dw; create user "dev_noas_app"identified by 'noas_app' resource pool pool_noas_app; -- 建立schema create schema if not existsnoas.noas_odm authorization dev_noas_odm; create schema if not existsnoas.noas_dw authorization dev_noas_dw; create schema if not existsnoas.noas_app authorization dev_noas_app; select start_timestamp, request_id, statement_id, request_type, substr(request,1,85), request_duration_ms from query_requests where request like '%prov_code as 220%' and request_type ='LOAD' order by start_timestamp desc limit 10; SELECT stream_name, schema_name, table_name, is_executing, accepted_row_count, rejected_row_count, DATEDIFF(ss,load_start::TIMESTAMP,GETDATE()::TIMESTAMP) AS DurationSec, ROUND((accepted_row_count+rejected_row_count)/DATEDIFF(ss,load_start::TIMESTAMP,GETDATE()::TIMESTAMP),3.0) AS RowsPerSec FROM load_streams WHERE is_executing='true' --看錶怎麼建的 select export_objects('','tb_dw_ct_tape_new_onest_day'); select node_name,storage_path,disk_space_free_percent from disk_storage where storage_path not ilike '%catalog%' order by disk_space_free_percent; wangguofei=> select table_schema,count(1) as cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' group by table_schema order by cnt desc; wangguofei=> select count(1) from tables where table_schema like '%csap%'; select substr(table_name,),count(1) as cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' group by table_schema order by cnt desc; select substr(table_name,regexp_instr(table_name,'_',1,3),regexp_instr(table_name,'_',1,4)-regexp_instr(table_name,'_',1,3)) , count(1) cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' and table_name like 'tb_' group by substr(table_name,regexp_instr(table_name,'_',1,3),regexp_instr(table_name,'_',1,4)-regexp_instr(table_name,'_',1,3)) order by cnt desc; select split_part() select count(1) from tables where table_name like '%rena%'; select sum(TABLE_SIZE_GB) from public.tb_wh_tableinfo_20180510 where table_name like '%inre%' or table_name like '%sqm%' or table_name like '%vona%' or table_name like '%qymn%' ; #!/bin/bash CurrentDir=`pwd` VSQL='/opt/vertica/bin/vsql -Udbadmin -wvertica11' DB_Name=CSAP_20_132 Logfile=${CurrentDir}/${DB_Name}_DB_`date "+%Y%m%d_%H%M%S"`.log $VSQL <<EOF |tee ${Logfile} -----------------------------echo -----------------------------echo >>> License狀態 select get_compliance_status() ; \\\-----------------------------echo >>> 磁碟空間檢查-檢查data目錄和catalog目錄使用情況 select node_name, storage_path, storage_usage, rank, disk_space_free_percent from disk_storage order by node_name; -----------------------------echo >>> 客戶端連線版本 select case when instr(client_label,'-')>0 then substr(client_label,1,instr(client_label,'-',1,2)) else client_label end client_label, count(*) from dc_session_starts group by 1 order by 2 desc; -----------------------------echo >>> 節點狀態 select node_name,last_msg_from_node_at ts, node_type, node_state, node_address,catalog_path from nodes order by node_name; -----------------------------echo >>> CATALOG與DATA目錄 select node_name, storage_path, storage_usage, rank, disk_space_free_percent from disk_storage order by node_name; -----------------------------echo >>> 資源池設定情況 select name,memorysize,maxmemorysize,plannedconcurrency ,maxconcurrency ,priority ,runtimepriority ,queuetimeout,runtimecap ,cascadeto from resource_pools; -----------------------------echo >>> 叢集catalog size select node_name,max(ts) as ts, max(catalog_size_in_MB) as catlog_size_in_MB from (select node_name,trunc((dc_allocation_pool_statistics_by_second."time")::TIMESTAMP,' SS'::VARCHAR(2)) AS ts, sum((dc_allocation_pool_statistics_by_second.total_memory_max_value - dc_allocation_pool_statistics_by_second.free_memory_min_value))/1024//102 4 AS catalog_size_in_MB from dc_allocation_pool_statistics_by_second group by 1,trunc((dc_allocation_pool_statistics_by_second."time")::TIMESTAMP,'SS'::VARCHAR(2))) fo o group by 1 order by 1; -----------------------------echo >>> 資料庫原始資料大小 select audit_start_timestamp ts,database_size_bytes/1024/1024//1024 as dbsize_gb,trunc(usage_percent,2) "use_per(%)" from license_audits where audited_data= 'Total' order by audit_start_timestamp desc limit 1; -----------------------------echo >>> 資料庫壓縮後資料大小 select sysdate ds,trunc(SUM(ps.wos_used_bytes+ps.ros_used_bytes)/1024/1024/1024::float) AS total_size_gb from projection_storage ps WHERE (ps.wos_used_bytes + ps.ros_u sed_bytes) > 0 group by 1; -----------------------------echo >>> 表分割槽數 select sysdate ds,table_schema,projection_name,count(distinct partition_key) partition_cnt, avg(ROS_ROW_COUNT) avg_rows from partitions group by 1,2,3 having count(dis tinct partition_key)>900 order by 4 desc,2 limit 10; -----------------------------echo >>> 緯度表或小表不合理分割槽 select distinct a.table_schema||'.'||t.table_name as table_name,a.is_segmented, substr(t.partition_expression,instr(t.partition_expression,'.')+1) partition_exp, a.par tition_cnt,a.rows_cnt,a.avg_partition_rows_cnt from (select pt.table_schema,pt.projection_name,pj.anchor_table_id,pj.is_segmented, count(distinct pt.partition_key) par tition_cnt, case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end rows_cnt, (case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end)//count(distinct pt.partition_key ) avg_partition_rows_cnt from partitions pt join projections pj using(projection_id) group by 1,2,3,4) a join tables t on t.table_id = a.anchor_table_id where a.rows_c nt < 10000000 order by a.avg_partition_rows_cnt,a.rows_cnt desc, partition_cnt desc limit 10; -----------------------------echo >>> 事實表不合理分割槽 select distinct a.table_schema||'.'||a.table_name as table_name,a.is_segmented, a.rows_cnt from (select t.table_schema,t.table_name,pj.projection_name,pj.is_segmented, case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end rows_cnt from projections p j join tables t on pj.anchor_table_id = t.table_id join projection_storage pt using (projection_id) group by 1,2,3,4) a where a.rows_cnt >= 1000000 and not exists(sele ct 'x' from partitions p where p.projection_name = a.projection_name) order by a.rows_cnt desc limit 10; -----------------------------echo >>> 維度表或小表不做資料分片 select p.projection_Schema || '.' || p.anchor_Table_name, sum(ps.ros_row_count) from projections p, projection_storage ps where p.projection_name = ps.projection_name and ps.ros_row_count < 1000000 and p.is_segmented group by 1 order by 2 asc limit 10; -----------------------------echo >>> 事實表進行資料分片 select proj, row_count/(proj_count) as table_row_count from ( select p.projection_schema || '.' || p.anchor_table_name as proj, sum(ps.ros_row_count) as row_Count, cou nt(distinct ps.projection_name) as proj_count from projections p, projection_storage ps where p.projection_name = ps.projection_name and p.projection_schema = ps.proje ction_schema and not p.is_segmented group by 1 ) pps where row_count/(proj_count) > 1000000 order by table_row_count desc limit 10; -----------------------------echo >>> 資料分佈傾斜 select projection,min_used_bytes//1024^3 min_used_GB, max_used_bytes//1024^3 max_used_GB,round(skew_pct::float,2) skew_pct from (select distinct trim(ps.projection) pr ojection, first_value(used_bytes) over (w order by used_bytes asc) as min_used_bytes, first_value(used_bytes) over (w order by used_bytes desc) as max_used_bytes, firs t_value(used_bytes) over (w order by used_bytes asc) /first_value(used_bytes) over (w order by used_bytes desc) as skew_pct from (select node_name, projection_id, proj ection_schema || '.' || projection_name as projection, sum(used_bytes) as used_bytes from projection_storage group by 1,2,3 ) as ps join projections p using (projectio n_id) where p.is_segmented and ps.used_bytes > 0 window w as (partition by ps.projection)) t where skew_pct< 0.8 order by 4 limit 10; -----------------------------echo >>> 表模型Projection個數 select t.table_schema,t.table_name, count(distinct p.projection_name) projection_cnt from tables t join projections p on t.table_id = p.anchor_table_id group by 1,2 having count(distinct p.projection_id)>10 order by 3 desc limit 10; -----------------------------echo >>> 每節點投影的ROS容器個數 select projection_name, node_name, sum(ros_count) as ros_cnt from projection_storage group by projection_name, node_name having sum(ros_count)>900 order by ros_cnt desc; -----------------------------echo >>> 未使用的投影 select anchor_table_name from projections where projection_name not in (select projection_name from projection_usage); -----------------------------echo >>> SQL執行類別統計 select query_type,case when query_duration_us < 1000000 then 'A. sub-second' when query_duration_us between 1000000 and 3000000 then 'B. 1-3 seconds' when query_duration_us between 3000000 and 7000000 then 'C. 3-7 seconds' when query_duration_us between 7000000 and 15000000 then 'D. 7-15 seconds' when query_duration_us between 15000000 and 30000000 then 'E. 15-30 seconds' when query_duration_us between 30000000 and 60000000 then 'F. 30-60 seconds' when query_duration_us between 60000000 and 180000000 then 'G. 1-3 minutes' when query_duration_us between 180000000 and 600000000 then 'H. 3-10 minutes' when query_duration_us between 600000000 and 1800000000 then 'I. 10-30 minutes' when query_duration_us > 1800000000 then 'J. more than 30 minutes' end, count(*) from query_profiles group by 1,2 order by 1,2 asc ; -----------------------------echo >>> 大表統計 select projection_schema, anchor_table_name, to_char(sum(used_bytes)/1024/1024/1024,'999,999.99') as disk_space_used_gb from projection_storage group by projection_schema, anchor_table_name order by disk_space_used_gb desc limit 10; -----------------------------echo >>> Top SQL select query_duration_us, table_name, user_name, processed_row_count as rows_processed, substr(query, 0,70) from query_profiles order by query_duration_us desc limit 10; ---------------近12小時內平均執行時常------------------ select (now() - 1/24) from_date, now(), query_type, count(1) total_exec_sql, min(query_duration_us//1000) min_ms, max(query_duration_us//1000) max_ms, avg(query_duration_us//1000) avg_ms from query_profiles where query_start::timestamp > now() - 1/24 group by 1,2,3 order by 7 desc ; ------------------------------json 入庫vertica-------------------------------
[email protected]:[/home/dbadmin]cat json.dat { "name": "Everest", "type":"mountain", "height": 29029, "hike_safety": 34.1 } { "name": "Mt St Helens", "type": "volcano", "hike_safety": 15.4 } CREATE TABLE mountains(name varchar(64), type varchar(32), height integer); COPY mountains FROM local '/home/dbadmin/json.dat' WITH PARSER fjsonParser(); -- Compute New Values for the Target Table https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/AdministratorsGuide/BulkLoadCOPY/IgnoringColumnsAndFieldsInTheLoadFile.htm --------------------------入庫的同時,對資料進行轉換------------------------------------------- CREATE TABLE names(first_name VARCHAR(20), last_name VARCHAR(20), full_name VARCHAR(60)); CREATE TABLE => COPY names(first_name,middle_name FILLER VARCHAR(20),last_name,full_name AS first_name||' '||middle_name||' '||last_name) FROM STDIN; Enter data to be copied followed by a newline. End with a backslash and a period on a line by itself. --------------------例子---------------------------- copy tb_dw_ct_knba_klg_webpage_day_yzg (statis_date as to_char(to_timestamp(op_time/1000)::date,'yyyymmdd'),data_time,data_ip,data_type,sis_id ,prov_code,serial_num ,staff_id,call_num,op_time,title,url,refer, page_loadtime,dom_loadtime ,white_time ,konwledge_channel,knowledge_id ,event_value,call_bgntime ,call_endtime) from local '/data/interface/servefc/zhishikubak/test/page_zhishiku_2018-06-13_13079.txt' select distinct(substr(request,1,100)),request_duration_ms,start_timestamp from query_requests where request like '%select%'order by start_timestamp desc limit 50
vertica實時消費kafka入庫操作的sh指令碼配置:
# Vertica 8.1.0 kafka_config=" --config-schema kafka_date_dimension4 --dbhost 192.168.1.1 --username dbadmin --password xxxx" # shutdown instance /opt/vertica/packages/kafka/bin/vkconfig shutdown --instance-name kafka_date_dimension4 ${kafka_config} echo "Shutdown Instance Complete!" # truncate table $VSQL <<- EOF drop schema kafka_date_dimension4 cascade; truncate table csapsmpl.tb_svr_u_cntmng; EOF # Create and Configure Scheduler /opt/vertica/packages/kafka/bin/vkconfig scheduler --create --add ${kafka_config} --frame-duration '00:00:10' --eof-timeout-ms 3000 --operator dbadmin echo "Create and Configure Scheduler Complete!" # Create a Cluster /opt/vertica/packages/kafka/bin/vkconfig cluster --create --cluster kafka_cluster --hosts 192.168.125.199:6667,192.168.125.136:6667,192.168.125.110:6667 ${kafka_config} echo "Create Cluster Complete!" # Create a Data Table # Create a Source /opt/vertica/packages/kafka/bin/vkconfig source --create --source KAFKA_CSAP_SERVERCORE_USMPL_CNTMNGT_TOPIC --cluster kafka_cluster --partitions 1 ${kafka_config} echo "Create Kafka Source Complete!" # Create a Target /opt/vertica/packages/kafka/bin/vkconfig target --create --target-schema csapsmpl --target-table tb_svr_u_cntmng ${kafka_config} echo "Create Target Complete!" # Create a Load-Spec /opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec2 --parser KafkaJSONParser --parser-parameters flatten_arrays=False,flatten_maps=False ${kafka_config} #/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec --parser KafkaJSONParser --filters "FILTER KafkaInsertDelimiters(delimiter=E'\n')" ${kafka_config} echo "Create Load-Spec Complete!" # Create a Microbatch /opt/vertica/packages/kafka/bin/vkconfig microbatch --create --microbatch tb_svr_u_cntmng --target-schema csapsmpl --target-table tb_svr_u_cntmng --rejection-schema csapsmpl --rejection-table tb_svr_u_cntmng_rej --load-spec load_date_dimension_spec2 --add-source KAFKA_CSAP_SERVERCORE_USMPL_CNTMNGT_TOPIC --add-source-cluster kafka_cluster ${kafka_config} echo "Create Microbatch Complete!" # Launch the Scheduler /opt/vertica/packages/kafka/bin/vkconfig launch --instance-name load_date_dimension_spec2 ${kafka_config} & echo "Launch the Scheduler Complete!" echo "Done!"