1. 程式人生 > >hive的基本語法操作

hive的基本語法操作

基本操作

DLL操作

資料庫- -----1. 建立資料庫:預設儲存在:HDFS預設位置:/user/hive/warehouse/資料庫名.db

create database if not exists hive;

-----2. 檢視資料庫

show databases;

–如果資料庫非常多,可以使用正則表示式,例如檢視以“h”開頭的資料庫:

show databases like 'h*';

-----3. 檢視資料庫資訊

describe database hive;

-----4. 刪除資料庫

drop database if exists hive;
#強制刪除資料庫(非空資料庫)
drop database if exists hive cascade;

二. 表

建立表

建立表之前,最好使用use 資料庫名;選擇資料庫,否則表會預設建立在default資料庫中;

–(1)建立內部表

create table if not exists student(
id int,
name string,
age int)
row format delimited fields terminated by '\t'
stored as textfile;
#指定列的分隔符,預設是^A,需要根據上傳檔案的分隔符確定。預設的行分隔符是'\n';
#指定儲存的格式
#預設的儲存的路徑

–(2)建立外部表

create external table if not exists stu_external2(
id int,
name string,
age int)
row format delimited fields terminated by '\t'
location '/shiny/hive.db/stu_external2';
#指定儲存的路徑

一級分割槽

create table if not exists stu_partition(
id int,
name string,
age int)
partitioned by(sex string)
row format delimited fields terminated by '\t';
#partitioned by指定按照那個欄位分割槽,這個欄位不能在定義表的時候定義

– 二級分割槽

create table if not exists stu_partition2(
id int,
name string,
age int)
partitioned by(classname string,sex string)
row format delimited fields terminated by '\t';

—插入女分割槽資料------

load data local inpath '/home/shiny/Desktop/data/female.txt' into table stu_partition partition(sex='female');
load data local inpath '/home/shiny/Desktop/data/female.txt' into table stu_partition2 partition(classname='1101',sex='female');

—插入男分割槽資料------

load data local inpath '/home/shiny/Desktop/data/male.txt' into table stu_partition partition(sex='male');
load data local inpath '/home/shiny/Desktop/data/male.txt' into table stu_partition2 partition(classname='1101',sex='male');

----查詢表的所有資料------

select * from stu_partition;

----查詢表分割槽-----------

show partitions stu_partition;

–(4)建立分桶表

create table if not exists stu_buck(
id int,
name string,
age int)
clustered by(id) sorted by(id desc) into 3 buckets
row format delimited fields terminated by '\t';
#指定分桶的欄位,排序的欄位,桶的數量

–插入資料

insert into table stu_buck select * from student distribute by (id) sort by (id desc);

------2. 修改表 –(1)重命名錶

alter table student rename to stu_internal;

–(2)增加列

alter table stu_partition add columns (address string);

—查看錶結構-----

desc stu_partition;

—查看錶結構詳細資訊 desc formatted stu_partition;

–(3)改變列 alter table stu_partition change id number string;

–(4)替換/刪除列 alter table stu_partition replace columns(id int,name string,age int);

–(5)新增分割槽------ alter table stu_partition add partition(sex=‘weizhi’);

–(6)刪除分割槽------ alter table stu_partition drop partition(sex=‘weizhi’);

------3. 刪除表 drop table if exists stu_external2;

–顯示當前資料庫中所有的表 show tables;

---------1. Load裝載資料------------------- –(1)載入本地資料(複製資料) load data local inpath ‘/home/shiny/Desktop/data/female.txt’ into table stu_internal;

–(2)載入HDFS資料(移動資料) load data inpath ‘/data/male.txt’ into table stu_internal;

–(3)載入本地資料覆蓋表中內容 load data local inpath ‘/home/shiny/Desktop/data/female.txt’ overwrite into table stu_internal;

---------2. INSERT插入資料----------------- –(1)單條插入(一般不使用) insert into table stu_internal values(1116,‘bob’,23);

–(2)利用查詢語句將結果匯入新表(新表必須事先手動建立) – 複製表(只是複製現有的表結構,不復制資料) create table student like stu_internal;

– 將資料匯入新表

insert overwrite/into table student select * from stu_internal where age>=23;

–(3)多重插入(新表事先建立)

# 新建表
create table stu_insert(
id int,
name string)
row format delimited fields terminated by '\t';

# 實現多重插入
from stu_internal
insert into table student select * where age<23
insert into table stu_insert select id,name;

–(4)CTAS(create table … as select …)(新表不用事先手動建立)如果select語句查詢由於某種原因而失敗,新表是不會建立的。

create table stu_ctas as select id,age from stu_internal where age<23;

-----3. INSERT匯出資料(注意是overwrite,不能使用into)----------------- –(1)單模式匯出:匯出到本地(^A(ctrl+A)為列分隔符,\n為行分隔符)

insert overwrite local directory '/home/shiny/Desktop/data/student' select * from student;

–(2)單模式匯出:匯出到HDFS(^A(ctrl+A)為列分隔符,\n為行分隔符)

insert overwrite directory '/student' select * from student;

----4. SELECT查詢資料--------------------- –建立表

create table if not exists score(
id int,
name string,
course string,
score int)
row format delimited fields terminated by '\t';

#載入本地資料
load data local inpath '/home/shiny/Desktop/data/score.txt' into table score;

–(1)GROUP BY:查詢每位學生總成績 – 注意:在Group by子句中,Select查詢的列,要麼需要是Group by中的列,要麼得是用聚合函式(比如sum、count等)加工過的列。不支援直接引用非Group by的列。

select id,name,sum(score) as count from score group by id,name;

–(2)ORDER BY:獲取全級總成績最高的學生資訊(全域性排序)預設是升序排序asc

select id,name,sum(score) as count from score group by id,name order by count desc limit 1;

–(3)SORT BY: 查詢學生資訊,按照id降序排序(區域性排序) –設定reduce的個數為2

set mapreduce.job.reduces=2;
create table stu_sort as select * from student sort by id desc;

–(4)先對age進行降序排序,age相同的情況下對id進行降序排序 –DISTRIBUTE BY + SORT BY:分桶和排序的組合操作,對id進行分桶,對age,id進行降序排序

-- 指定開啟分桶
SET hive.enforce.bucketing = true;
-- 指定 reducetask 數量,也就是指定桶的數量
SET mapreduce.job.reduces=3;
insert overwrite local directory '/home/shiny/Desktop/data/distr' 
select * from student distribute by (id) sort by (age desc,id desc);

–(5)對id進行分桶,對id進行升序排序 – CLUSTER BY:分桶和排序的組合操作,等於DISTRIBUTE BY + SORT BY(前提:分桶和SORT欄位是同一個)。

insert overwrite local directory '/home/shiny/Desktop/data/cluster'
select * from student cluster by (id); -- 等價於distribute by id sort by id

join連線

#內連線:顯示符合條件的連線
select * from studenta a join studentb b on a.id=b.id;
#左外連線,以左表位基準,匹配不上的null
select * from studenta a left join studentb b on a.id=b.id;
#右外連線,以右表為基準
select * from studenta a right join studentb b on a.id=b.id;
#全外連線,以兩個表為標準,並去重
select * from student a full join studentb b on a.id=b.id;
#左半連線:只顯示匹配成功後左表的資料
select * from studenta a left semi join studentb b on a.id=b.id;