Apache Hawq功能測試腳本
1.下載工具TPC-H
下載地址
2. 拷貝生成makefile,並修改makefile
/opt/tpc-h-v2.17.0/dbgen
cp makefile.suite makefile
修改makefile
3.編譯
make -f makefile
4.生成數據
dbgen -v -U 1 -s 1
./dbgen
5.創建數據庫以及相關表格語句
--create database tpch; \c tpch; --1. region drop table if exists region; create table region( r_regionkey integer, r_name char(25), r_comment varchar(152), r_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by(r_regionkey); --2. nation drop table if exists nation; create table nation( n_nationkey integer, n_name char(25), n_regionkey integer, n_comment varchar(152), n_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by(n_nationkey); --3.part drop table if exists part; create table part( p_partkey bigint, p_name varchar(55), p_mfgr char(25), p_brand char(10), p_type varchar(25), p_size integer, p_container varchar(10), p_retailprice decimal, p_comment varchar(117), p_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by (p_partkey); --4.supplier drop table if exists supplier; create table supplier( s_suppkey bigint, s_name char(25), s_address varchar(40), s_nationkey int, s_phone char(15), s_acctbal decimal, s_comment varchar(101), r_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by (s_suppkey); --5.partsupp drop table if exists partsupp; create table partsupp( ps_partkey bigint, ps_suppkey bigint, ps_availqty integer, ps_supplycost decimal, ps_comment varchar(199), ps_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by(ps_partkey); #6.customer drop table if exists customer; create table customer( c_custkey bigint, c_name char(25), c_address char(40), c_nationkey integer, c_phone char(15), c_acctbal decimal, c_mktsegment char(10), c_comment varchar(117), c_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by (c_custkey); --7.orders drop table if exists orders; create table orders( o_orderkey bigint, o_custkey bigint, o_orderstatus char(1), o_totalprice decimal, o_orderdate date, o_orderpriority char(15), o_clerk char(15), o_shippriority integer, o_comment varchar(117), o_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by (o_orderkey ); --8.lineitem drop table if exists lineitem; create table lineitem( l_orderkey bigint, l_partkey bigint, l_suppkey bigint, l_linenumber integer, l_quantity decimal, l_extendedprice decimal, l_discount decimal, l_tax decimal, l_returnflag char(1), l_linestatus char(1), l_shipdate date, l_commitdate date, l_receiptdate date, l_shipinstruct char(25), l_shipmode char(10), l_comment varchar(117), l_extra char(1) )with(appendonly=true,orientation=parquet,compresstype=snappy) distributed by (l_linenumber);
6.查詢語句在queries目錄下 22個
二、創建表空間
1. 創建文件空間配置文件,在master節點上執行
$hawq filespace -o tpc_h_config
文件內容如下:
filespace:fs_tpc_h
fsreplica:3
dfs_url::mycluster/hawq_fs/fs_tpc_h
2. 創建HDFS目錄
$hdfs dfs -mkdir /hawq_fs
$hdfs dfs -chown gpadmin:gpadmin /hawq_fs
$hdfs dfs -ls /
3.創建文件空間
$hawq filespace -c tpc_h_config
4.創建表空間,psql
create tablespace ts_tpc_h filespace fs_tpc_h;
5.刪除文件空間和表空間
表空間擁有者可以刪除,且不能有其他數據對象(如數據庫,數據表)使用當前表空間,不能有表空間使用當前文件空間,才可以刪除。
6. 查看當前所有表空間
SELECT spcname AS tblspc, fsname AS filespc, fsedbid AS seg_dbid, fselocation AS datadir FROM pg_tablespace pgts, pg_filespace pgfs, pg_filespace_entry pgfse WHERE pgts.spcfsoid=pgfse.fsefsoid AND pgfse.fsefsoid=pgfs.oid ORDER BY tblspc, seg_dbid;
三、創建數據庫和Schema
1. 創建數據庫
create database tpc_h with template template0 tablespace ts_tpc_h;
\c tpc_h;
2.創建schema
create schema extschema;
3. 查看當前schema,函數current_schema()
select current_schema();
4. 查看當前Schema的搜索路徑
show search_path;
5. 指定schema創建數據表
create external table extschema.region(
r_regionkey integer,
r_name char(25),
r_comment varchar(152),
r_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/region.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
6. 修改SCHEMA的搜索路徑
ALTER DATABASE tpc_h SET search_path TO extschema, public;
退出,重啟進入psql客戶端
四、創建數據表
執行第一章中的腳本。
五、創建視圖
drop view if exists v_order_customer ;
create view v_order_customer
as select o.o_orderkey, o.o_custkey, c.c_name,o.o_orderdate
from orders o join customer c on o.o_custkey=c.c_custkey
where o.o_orderdate>‘1997-01-01‘;
select * from v_order_customer limit 10;
select * from v_order_customer where o_orderdate<=‘1997-01-01‘;
六、 使用gpfdist加載數據
1. 安裝需要的rpm包
yum install -y libevent libyaml apr
2.啟動gpfdist文件服務器
gpfdist -d /tmp/data -p 8081 -l ~/log &
3.創建外部表格
--1. region
drop external table if exists ext_region;
create external table ext_region(
r_regionkey integer,
r_name char(25),
r_comment varchar(152),
r_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/region.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
--2. nation
drop external table if exists ext_nation;
create external table ext_nation(
n_nationkey integer,
n_name char(25),
n_regionkey integer,
n_comment varchar(152),
n_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/nation.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
--3.part
drop external table if exists ext_part;
create external table ext_part(
p_partkey bigint,
p_name varchar(55),
p_mfgr char(25),
p_brand char(10),
p_type varchar(25),
p_size integer,
p_container varchar(10),
p_retailprice decimal,
p_comment varchar(117),
p_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/part.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
--4.supplier
drop external table if exists ext_supplier;
create external table ext_supplier(
s_suppkey bigint,
s_name char(25),
s_address varchar(40),
s_nationkey int,
s_phone char(15),
s_acctbal decimal,
s_comment varchar(101),
s_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/supplier.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
--5.partsupp
drop external table if exists ext_partsupp;
create external table ext_partsupp(
ps_partkey bigint,
ps_suppkey bigint,
ps_availqty integer,
ps_supplycost decimal,
ps_comment varchar(199),
ps_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/partsupp.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
#6.customer
drop external table if exists ext_customer;
create external table ext_customer(
c_custkey bigint,
c_name char(25),
c_address char(40),
c_nationkey integer,
c_phone char(15),
c_acctbal decimal,
c_mktsegment char(10),
c_comment varchar(117),
c_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/customer.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
--7.orders
drop external table if exists ext_orders;
create external table ext_orders(
o_orderkey bigint,
o_custkey bigint,
o_orderstatus char(1),
o_totalprice decimal,
o_orderdate date,
o_orderpriority char(15),
o_clerk char(15),
o_shippriority integer,
o_comment varchar(117),
o_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/orders.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
--8.lineitem
drop external table if exists ext_lineitem;
create external table ext_lineitem(
l_orderkey bigint,
l_partkey bigint,
l_suppkey bigint,
l_linenumber integer,
l_quantity decimal,
l_extendedprice decimal,
l_discount decimal,
l_tax decimal,
l_returnflag char(1),
l_linestatus char(1),
l_shipdate date,
l_commitdate date,
l_receiptdate date,
l_shipinstruct char(25),
l_shipmode char(10),
l_comment varchar(117),
l_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/lineitem.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘;
handling errors:
drop external table if exists ext_region1;
create external table ext_region1(
r_regionkey integer,
r_name char(25),
r_comment varchar(152),
r_extra char(1)
)location(‘gpfdist://10.110.17.104:8081/region.tbl‘)
format ‘text‘ (delimiter ‘|‘ null ‘‘ escape ‘OFF‘)
encoding ‘UTF8‘
log errors into errortable segment reject limit 10 rows;
測試是否可以正常讀取數據:
select * from ext_region limit 10;
select * from ext_nation limit 10;
select * from ext_part limit 10;
select * from ext_supplier limit 10;
select * from ext_partsupp limit 10;
select * from ext_customer limit 10;
select * from ext_orders limit 10;
select * from ext_lineitem limit 10;
4. 加載數據
insert into region select * from ext_region;
insert into nation select * from ext_nation;
insert into part select * from ext_part;
insert into supplier select * from ext_supplier;
insert into partsupp select * from ext_partsupp;
insert into customer select * from ext_customer;
insert into orders select * from ext_orders;
insert into lineitem select * from ext_lineitem;
測試是否可以正常讀取數據:
select from region limit 10;
select from nation limit 10;
select from part limit 10;
select from supplier limit 10;
select from partsupp limit 10;
select from customer limit 10;
select from orders limit 10;
select from lineitem limit 10;
七.使用copy加載數據
1. 指定的目錄master節點必須可以訪問到
copy region from ‘/tmp/data/region.tbl‘
with delimiter ‘|‘;
--log errors into errtable segment reject limit 10 rows;
八.hawq load工具程序加載數據
1. 在gpfdist節點上修改 /home/gpadmin/.bash_profile
在文件末尾增加如下內容: source /usr/local/hawq/greenplum_path.sh
2.sudo yum install -y libevent libyaml apr
3. 創建表格
create table audit(cmd varchar(10), t timestamp);
4. 啟動gpfdist文件服務器
gpfdist -p 8081 -d /tmp/data -l ~/log &
5.新增配置文件load.yaml,內容如下:
---
VERSION: 1.0.0.1
DATABASE: tpc_h
USER: gpadmin
HOST: master1
PORT: 5432
GPLOAD:
INPUT:
- SOURCE:
LOCAL_HOSTNAME:
- worker1
PORT: 8081
FILE:
- region.tbl
- COLUMNS:
- r_regionkey: integer
- r_name: text
- r_comment: text
- r_extra: text
- FORMAT: text
- DELIMITER: ‘|‘
- ERROR_LIMIT: 25
- ERROR_TABLE: public.errortable
OUTPUT:
- TABLE: public.region
- MODE: INSERT
SQL:
- BEFORE: "INSERT INTO audit VALUES(‘start‘, current_timestamp)"
- AFTER: "INSERT INTO audit VALUES(‘end‘, current_timestamp)"
4. 執行hawq load命令
hawq load -f load.yaml
九. PXF插件加載HDFS數據
1.創建外部表
drop external table if exists pxf_hdfs_textsimple;
create external table pxf_hdfs_textsimple(location text, month text, num_orders int, total_sales float8)
location (‘pxf://mycluster/hawq_fs/pxf_data/pxf_hdfs_simple.txt?profile=hdfstextsimple‘)
format ‘text‘ (delimiter=e‘,‘);
//namenode單點時:master1.bigdata是指namenode節點,端口是pxf的端口地址
create external table pxf_hdfs_textsimple(location text, month text, num_orders int, total_sales float8)
location (‘pxf://master1.bigdata:51200/hawq_fs/pxf_data/pxf_hdfs_simple.txt?profile=hdfstextsimple‘)
format ‘text‘ (delimiter=e‘,‘);
自定義空值:
drop external table if exists h.pxf_inventory;
create external table h.pxf_inventory
(
inv_date_sk integer,
inv_item_sk integer,
inv_warehouse_sk integer,
inv_quantity_on_hand integer,
inv_null varchar(10)
)location (‘pxf://master1.bigdata:51200/tpcdsdata/30T/inventory/data-m-*?profile=hdfstextsimple‘)
format ‘TEXT‘ (DELIMITER ‘|‘ NULL ‘‘);
2. 創建pxf_hdfs_simple.txt文件,內容:
echo ‘Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67‘ > /tmp/pxf_hdfs_simple.txt
3.文件上傳至hdfs
hdfs dfs -put /tmp/pxf_hdfs_simple.txt /hawq_fs/pxf_data/
hdfs dfs -cat /hawq_fs/pxf_data/pxf_hdfs_simple.txt
select * from pxf_hdfs_textsimple;
十. PXF插件加載HIVE數據
1.創建hawq數據表
drop table if exists salesinfo;
create table salesinfo(location text, month text, num_orders int, total_sales float8)
with(appendonly=true,orientation=parquet,compresstype=snappy)
distributed randomly;
1. hive中創建表sales_info
/usr/hdp/current/hive-client/bin
./hive
drop table if exists sales_info;
create table sales_info (location string, month string,
number_of_orders int, total_sales double)
row format delimited fields terminated by ‘,‘
stored as textfile;
Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67
San Francisco,Sept,156,6846.34
Paris,Nov,159,7134.56
San Francisco,Jan,113,5397.89
Prague,Dec,333,9894.77
Bangalore,Jul,271,8320.55
Beijing,Dec,100,4248.41
2. load 數據到hive數據表中
load data local inpath ‘/tmp/pxf_hive_datafile.txt‘
into table sales_info;
方法一:創建外部表
psql
1.創建外部表方式
drop table if exists salesinfo_hiveprofile;
create external table salesinfo_hiveprofile(location text, month text, num_orders int, total_sales float8)
location (‘pxf://mycluster/default.sales_info?profile=hive‘)
format ‘custom‘ (formatter=‘pxfwritable_import‘);
2. 導入數據
insert into salesinfo select * from salesinfo_hiveprofile;
方法二:讀取hive Hcatalog 元數據服務
這種方式需要修改pxf-profiles.xml中關於Hive的內容,增加outputformat對象
HiveText:
<outputformat>org.apache.hawq.pxf.service.io.Text</outputformat>
HiveORC:
<outputformat>org.apache.hawq.pxf.service.io.GPDBWritable</outputformat>
HiveRC:
<outputformat>org.apache.hawq.pxf.service.io.Text</outputformat>
Hive
<outputformat>org.apache.hawq.pxf.service.io.GPDBWritable</outputformat>
格式:```
SELECT * FROM hcatalog.hive-db-name.hive-table-name;
如:
```SELECT * FROM hcatalog.default.sales_info;
導入數據:
insert into salesinfo SELECT * FROM hcatalog.default.sales_info;
十一. PXF插件加載HBASE數據
--方式1,直接引用列族和Qualifier
-
創建外部表
drop EXTERNAL table if exists hbase_sales;
create external table hbase_sales(
recordkey bytea,
"cf1:saleid" varchar,
"cf8:comments" varchar)
location (‘pxf://mycluster/sales?profile=hbase‘)
format ‘custom‘(formatter=‘pxfwritable_import‘)
encoding ‘utf8‘; - 使用HBase Shell創建HBase數據表,並寫入數據
cd /usr/hdp/current/hbase-client/bin
./hbase shell
創建表格
create ‘sales‘, {NAME=>‘cf1‘,VERSION=>2},{NAME=>‘cf8‘,VERSION=>2}
#create ‘pxf_hbase_region‘, {NAME=>‘cf1‘,VERSION=>2}
寫入數據
put ‘sales‘,‘rk001‘,‘cf1:saleid‘, ‘s001‘
put ‘sales‘,‘rk001‘,‘cf8:comments‘, ‘comments1‘
select * from hbase_sales;
--方式2,在HBase中創建pxflookup表
create ‘pxflookup‘, {NAME=>‘mapping‘,VERSION=>2}
put ‘pxflookup‘, ‘sales‘, ‘mapping:id‘, ‘cf1:saleid‘
put ‘pxflookup‘, ‘sales‘, ‘mapping:cmts‘, ‘cf8:comments‘
drop EXTERNAL table if exists pxf_hbase_sales ;
CREATE EXTERNAL TABLE pxf_hbase_sales (
recordkey bytea,
id varchar,
cmts varchar
)location (‘pxf://mycluster/sales?profile=hbase‘)
format ‘custom‘(formatter=‘pxfwritable_import‘)
encoding ‘utf8‘;
select * from pxf_hbase_sales;
十二. PXF訪問Jdbc(mysql)
grant all privileges on . to "pxf"@"%" identified by ‘test‘;
mysql> use test;
mysql> create table myclass(
id int(4) not null primary key,
name varchar(20) not null,
gender int(4) not null default ‘0‘,
degree double(16,2));
insert into myclass values(1,"tom",1,90);
insert into myclass values(2,‘john‘,0,94);
insert into myclass values(3,‘simon‘,1,79);
/etc/pxf/conf/pxf-public.classpath添加
/usr/lib/pxf/mysql-connector-java-*.jar
gpadmin=#
drop external table if exists jdbc_myclass;
CREATE EXTERNAL TABLE jdbc_myclass(id integer,
name text,
gender integer,
degree float8)
LOCATION (‘pxf://localhost:51200/test.myclass‘
‘?PROFILE=JDBC‘
‘&JDBC_DRIVER=com.mysql.jdbc.Driver‘
‘&DB_URL=jdbc:mysql://10.110.22.191:3306/test&USER=pxf&PASS=test‘
)
FORMAT ‘CUSTOM‘ (Formatter=‘pxfwritable_import‘);
select * from jdbc_myclass;
slaes_info:
create table sales_info (location varchar(200), month varchar(10),
number_of_orders int, total_sales double);
insert into sales_info values("Prague","Jan",101,4875.33),
("Rome","Mar",87,1557.39),
("Bangalore","May",317,8936.99),
("Beijing","Jul",411,11600.67),
("San Francisco","Sept",156,6846.34),
("Paris","Nov",159,7134.56),
("San Francisco","Jan",113,5397.89),
("Prague","Dec",333,9894.77),
("Bangalore","Jul",271,8320.55),
("Beijing","Dec",100,4248.41);
psql:
drop external table if exists jdbc_sales_info ;
CREATE EXTERNAL TABLE jdbc_sales_info (location varchar(200), month varchar(10),
number_of_orders integer, total_sales float8)
LOCATION (‘pxf://localhost:51200/test.sales_info‘
‘?PROFILE=JDBC‘
‘&JDBC_DRIVER=com.mysql.jdbc.Driver‘
‘&DB_URL=jdbc:mysql://10.110.22.191:3306/test&USER=pxf&PASS=test‘
)
FORMAT ‘CUSTOM‘ (Formatter=‘pxfwritable_import‘);
CREATE EXTERNAL TABLE sales(id integer,
cdate date,
amt float8,
grade text)
LOCATION (‘pxf://localhost:51200/sales‘
‘?PROFILE=JDBC‘
‘&JDBC_DRIVER=com.mysql.jdbc.Driver‘
‘&DB_URL=jdbc:mysql://192.168.200.6:3306/demodb&USER=root&PASS=root‘
‘&PARTITION_BY=cdate:date&RANGE=2008-01-01:2010-01-01&INTERVAL=1:year‘
)
FORMAT ‘CUSTOM‘ (Formatter=‘pxfwritable_import‘);
十三. gpfdist卸載數據
方法一: gpfdist 定義基於文件的可寫外部表
- 定義可寫外部表
create writable external table unload_region
(like region)
location (‘gpfdist://10.110.17.104:8081/exp_region.tbl‘)
format ‘text‘ (delimiter ‘,‘);
--只可寫,不可讀
-
導入數據
insert into unload_region select * from region; - 查看導出的數據文件exp_region.tbl
cat exp_region.tbl
方法二 使用copy卸載數據
copy region
to ‘/home/gpadmin/region.out‘;
copy (select * from region where r_regionkey=1)
to ‘/home/gpadmin/region.out‘;
十三. PXF插件寫數據到HDFS
1.創建可寫外部表
create writable external table pxf_hdfs_writabletbl_1(location text, month text, num_orders int, total_sales float8)
location (‘pxf://mycluster/hawq_fs/pxf_data/pxfwritable_hdfs_textsimple1?profile=hdfstextsimple‘)
format ‘text‘ (delimiter=e‘,‘);
2.寫入數據
insert into pxf_hdfs_writabletbl_1 values ( ‘frankfurt‘, ‘mar‘, 777, 3956.98 );
insert into pxf_hdfs_writabletbl_1 values ( ‘cleveland‘, ‘oct‘, 3812, 96645.37 );
3.只可寫不可讀
Apache Hawq功能測試腳本