HIVE 分割槽表 分桶表
//分割槽表,優化手段之一,從目錄的層面控制搜尋資料的範圍。 //建立分割槽表. $hive>CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
//顯式表的分割槽資訊 $hive>SHOW PARTITIONS t3;
//新增分割槽,建立目錄 $hive>alter table t3 add partition (year=2014, month=12); //刪除分割槽 hive>ALTER TABLE t3 DROP IF EXISTS PARTITION (year=2014, month=11);
//分割槽結構 hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=11 hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=12
//載入資料到分割槽表 hive>load data local inpath '/home/centos/customers.txt' into table t3 partition(year=2014,month=11);
//建立桶表 $hive>CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ; ******
CLUSTERED BY (m) INTO n BUCKETS 按照m欄位分成n個桶
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 用“,”進行分行
****** //載入資料不會進行分桶操作 $hive>load data local inpath '/home/centos/customers.txt' into table t4 ; //查詢t3表資料插入到t4中。 $hive>insert into t4 select id,name,age from t3 ;
//桶表的數量如何設定? //評估資料量,保證每個桶的資料量block的2倍大小。